cd "C:\Users\jedwab\Desktop\Replication files for Esch et al 2024\Data Analysis in Stata"

* The data was created using the do-file Data creation 01262024

*************************************************
*** SUMMARY STATISTICS REPORTED IN THE TEXT *****
*** SOME REGRESSIONS RESULTS REPORTED IN TEXT ***
*************************************************

******************************************
** COMPARISON OF WSF3D.V2 vs. WSF3D.V1 ***
******************************************

** For all economies **

* Total volume
use country3Dfull, clear
collapse (sum) stock3D_old stock3D_new
sum stock3D_old stock3D_new
* v1: 1,558 billion m$^3$
* v2: 1,593 billion m$^3$
gen share = (1-stock3D_old/stock3D_new)*100
sum share
* 2.2 => 2 when rounded

* Total volume for cities
use city3Dfull, clear
collapse (sum) stock3D_old stock3D_new
gen share = (1-stock3D_old/stock3D_new)*100
sum share
* 4.5 => 5 when rounded

* Total volume for megacities above 1 million
use city3Dfull, clear
keep if P15 >= 1000000
collapse (sum) stock3D_old stock3D_new
gen share = (1-stock3D_old/stock3D_new)*100
sum share
* 7.0 => 7 when rounded

* Total volume for megacities above 5 million
use city3Dfull, clear
keep if P15 >= 5000000
collapse (sum) stock3D_old stock3D_new
gen share = (1-stock3D_old/stock3D_new)*100
sum share
* 8.9 => 9 when rounded

** For developing economies **

* Total volume
use country3Dfull, clear
drop if incgroup2019 == "H"
collapse (sum) stock3D_old stock3D_new
gen share = (1-stock3D_old/stock3D_new)*100
sum share
* 1.6 => 2 when rounded

* Total volume for cities
use city3Dfull, clear
drop if incgroup2019 == "H"
collapse (sum) stock3D_old stock3D_new
gen share = (1-stock3D_old/stock3D_new)*100
sum share
* 3.6 => 4 when rounded

* Total volume for megacities above 1 million
use city3Dfull, clear
drop if incgroup2019 == "H"
keep if P15 >= 1000000
collapse (sum) stock3D_old stock3D_new
gen share = (1-stock3D_old/stock3D_new)*100
sum share
* 6.0 => 6 when rounded

* Total volume for megacities above 5 million
use city3Dfull, clear
drop if incgroup2019 == "H"
keep if P15 >= 5000000
collapse (sum) stock3D_old stock3D_new
gen share = (1-stock3D_old/stock3D_new)*100
sum share
* 7.54 => 8 when rounded

** V2 vs. V1 for developing economies by region **

* Total volume
use country3Dfull, clear
drop if incgroup2019 == "H"
* We create the main regional groups used in the figure. 
gen asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia" | unsubregion == "Melanesia")
gen neoeuro = (unsubregion == "Australia/New Zealand" | unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe" | unsubregion == "North America")
gen lac = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in asia neoeuro lac africa {
replace unregion = "`X'" if `X' == 1
}
tab country_wb if unregion == ""
drop if unregion == ""
collapse (sum) stock3D_old stock3D_new, by(unregion)
gen share = (1-stock3D_old/stock3D_new)*100
sum share

* Total volume for cities
use city3Dfull, clear
drop if incgroup2019 == "H"
* We create the main regional groups used in the figure. 
gen asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia" | unsubregion == "Melanesia")
gen neoeuro = (unsubregion == "Australia/New Zealand" | unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe" | unsubregion == "North America")
gen lac = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in asia neoeuro lac africa {
replace unregion = "`X'" if `X' == 1
}
tab country_wb if unregion == ""
drop if unregion == ""
collapse (sum) stock3D_old stock3D_new, by(unregion)
gen share = (1-stock3D_old/stock3D_new)*100
sum share

****************************************************
** MAIN DECOMPOSITION: COUNTRY: UPWARD VS OUTWARD **
****************************************************

use country3Dfull, clear
gen test = pcstock3D_2D+pcstock3D_no2D
corr pcstock3D_new
drop test 

***** FOR ALL ECONOMIES *****

* UNW = unweighted
* W = population-weighted

* Without pop weights 
foreach X in pcstock3D_new pcstock3D_2D pcstock3D_no2D { 
reg `X' lpcgdp2010s, robust
}
* 112***, 40***, 73***
* Contribution change in area = 40/112*100 = about 35% (UNW)

* With pop weights
foreach X in pcstock3D_new pcstock3D_2D pcstock3D_no2D { 
reg `X' lpcgdp2010s [w=totpop], robust
}
* 138***, 53***, 85***
* Contribution change in area = 53/138*100 = about 38% (W)
* The mid-value is about 36.5% but we report 35% in the text 

foreach X in pcstock3D_no2D pcstock3D_LRno2D pcstock3D_HR { 
reg `X' lpcgdp2010s, robust
}
* 73***, 67***, 5***
* Contribution change in low-rises = 67/73*100 = 92% (UNW)

foreach X in pcstock3D_no2D pcstock3D_LRno2D pcstock3D_HR { 
reg `X' lpcgdp2010s [w=totpop], robust
}
* 84.6***, 79.6***, 5***
* Contribution change in low-rises = 79.6/84.6*100 = 94% (W)
* The mid-value is about 93% and we report 93% in the text 

***** FOR DEVELOPING ECONOMIES ONLY *****

gen H = (incgroup2019 == "H")
keep if H == 0

* Without pop weights
foreach X in pcstock3D_new pcstock3D_2D pcstock3D_no2D { 
reg `X' lpcgdp2010s, robust
}
* 77.2***, 33.7***, 43.5***
* Contribution change in area = 33.7/77.2*100 = 44% (UNW)

* With pop weights
foreach X in pcstock3D_new pcstock3D_2D pcstock3D_no2D { 
reg `X' lpcgdp2010s [w=totpop], robust
}
* 103.7***, 37.4***, 66.3***
* Contribution change in area = 37.4/103.7*100 = about 36% (W)
* Using the mid-values between both, we get 40%

* Without pop weights
foreach X in pcstock3D_no2D pcstock3D_LRno2D pcstock3D_HR { 
reg `X' lpcgdp2010s, robust
}
* 43.5***, 41.0***, 2.5***
* Contribution change in low-rises = 41/43.5*100 = 94.3% (UNW)

foreach X in pcstock3D_no2D pcstock3D_LRno2D pcstock3D_HR { 
reg `X' lpcgdp2010s [w=totpop], robust
}
* 66.3***, 62.8***, 3.5***
* Contribution change in low-rises = 62.8/66.3*100 = 94.7% (W)
* Using the mid-values between both, we get 94.5%
* We say that we find similar patterns 

******************************************************************************************

*************
*************
** TABLES ***
*************
*************

***************
*** TABLE 1 ***
***************

***** 1. WORLD *****

* These are for the "sum" variables. 
use country3Dfull, clear
desc stock3D_new stock2D
collapse (sum) stock2D stock3D_new stock3D_2D stock3D_no2D
* Cubic m and sq m
* We transform into cubic km and sq km
replace stock2D = stock2D/1000000
replace stock3D_new = stock3D_new*0.000000001
replace stock3D_2D = stock3D_2D*0.000000001
replace stock3D_no2D = stock3D_no2D*0.000000001
drop stock3D_no2D stock3D_no2D
gen count = 1
sort count
save temp, replace

* These are for the "mean" variables. 
use country3Dfull, clear
order country_wb
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
order country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
keep country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D pcstock3D_2D share2D* shareLR* shareHR* totpop incgroup2019
collapse (mean) pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop]
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2 = round(pcstock2,1)
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
gen count = 1
sort count
merge count using temp
drop _m count
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
gen group = "World"
gen classification = "1. World"
order classification group
save temp, replace

* WE CORRECT MANUALLY THESE ONES MANUALLY IN THE TABLE AS WE GET THE SUMS AND THEN THE MEAN
* Whereas above we report the mean directly
* Shares outward vs upward lowrises and highrises
use country3Dfull, clear
desc stock3D_new stock2D
collapse (sum) stock3D_new stock3D_2D stock3D_no2D stock3D_HR stock3D_LR
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
sum share2Din3D-shareLRin3D
* 47 51 2

***** 2. DEVELOPED/DEVELOPING STATUS 2019 *****

* These are for the "sum" variables. 
use country3Dfull, clear
desc stock3D_new stock2D
gen developed = (incgroup2019 == "H")
collapse (sum) stock2D stock3D_new, by(developed) 
* Cubic m and sq m
* We transform into cubic km and sq km
replace stock2D = stock2D/1000000
replace stock3D_new = stock3D_new*0.000000001
sort developed
save temp2, replace

* These are for the "mean" variables. 
use country3Dfull, clear
order country_wb
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
order country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
keep country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
tab incgroup2019, m
tab country_wb if incgroup2019 == ""
* These are small countries, so they should not matter overall. 
drop if incgroup2019 == ""
gen developed = (incgroup2019 == "H")
collapse (mean) pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(developed)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2 = round(pcstock2,1)
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
sort developed
merge developed using temp2
drop _m
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
gen group = "A. Developed Economies" if developed == 1
replace group = "B. Developing Economies" if developed == 0
drop developed 
gen classification = "2. Developed/developing '19"
append using temp
order classification group
save temp, replace

* WE CORRECT MANUALLY THESE ONES MANUALLY IN THE TABLE AS WE GET THE SUMS AND THEN THE MEAN
* Whereas above we report the mean directly
* Shares outward vs upward lowrises and highrises
use country3Dfull, clear
desc stock3D_new stock2D
drop if incgroup2019 == ""
gen developed = (incgroup2019 == "H")
collapse (sum) stock3D_new stock3D_2D stock3D_no2D stock3D_HR stock3D_LR, by(developed) 
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
bysort developed: sum share2Din3D-shareLRin3D
* Developed: 43 54 3
* Developing: 50 49 2

***** 3. INCOME GROUP 2019 *****

* These are for the "sum" variables. 
use country3Dfull, clear
desc stock3D_new stock2D
gen developed = (incgroup2019 == "H")
drop if incgroup2019 == ""
collapse (sum) stock2D stock3D_new, by(incgroup2019) 
* Cubic m and sq m
* We transform into cubic km and sq km
replace stock2D = stock2D/1000000
replace stock3D_new = stock3D_new*0.000000001
sort incgroup2019
save temp2, replace

* These are for the "mean" variables. 
use country3Dfull, clear
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
order country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
keep country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
tab incgroup2019, m
tab country_wb if incgroup2019 == ""
* These are small countries, so they should not matter overall. 
drop if incgroup2019 == ""
collapse (mean) pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(incgroup2019)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2 = round(pcstock2,1)
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
sort incgroup2019
merge incgroup2019 using temp2
drop _m
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
gen group = "A. High-Income Economies" if incgroup2019 == "H"
replace group = "B. Upper-Middle-Income Economies" if incgroup2019 == "UM"
replace group = "C. Lower-Income Economies" if incgroup2019 == "LM"
replace group = "D. Low-Income Economies" if incgroup2019 == "L"
drop incgroup* 
gen classification = "3. Income groups '19"
append using temp
order classification group
gsort+ classification
save temp, replace

***** 4. WB REGIONS *****

* These are for the "sum" variables. 
use country3Dfull, clear
desc stock3D_new stock2D
collapse (sum) stock2D stock3D_new, by(wbregionshort) 
* Cubic m and sq m
* We transform into cubic km and sq km
replace stock2D = stock2D/1000000
replace stock3D_new = stock3D_new*0.000000001
sort wbregionshort
save temp2, replace

* These are for the "mean" variables. 
use country3Dfull, clear
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
order country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop wbregionshort
keep country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop wbregionshort
tab wbregionshort, m
collapse (mean) pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(wbregionshort)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2 = round(pcstock2,1)
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
sort wbregionshort
merge wbregionshort using temp2
drop _m
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
gen group = wbregionshort
drop wbregionshort 
gen classification = "4. WB region"
append using temp
order group
sort group
save temp, replace

***** 5. UN REGIONS *****

* These are for the "sum" variables. 
use country3Dfull, clear
desc stock3D_new stock2D
* We create the subregion dummies *
gen Asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia")
gen Pacific = (unsubregion == "Melanesia" | unsubregion == "Australia/New Zealand")
gen Europe = (unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe")
gen Northamerica = (unsubregion == "North America")
gen LAC = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen Africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in Asia Pacific Europe Northamerica LAC Africa {
replace unregion = "`X'" if `X' == 1
}
collapse (sum) stock2D stock3D_new, by(unregion) 
* Cubic m and sq m
* We transform into cubic km and sq km
replace stock2D = stock2D/1000000
replace stock3D_new = stock3D_new*0.000000001
sort unregion
save temp2, replace

* These are for the "mean" variables. 
use country3Dfull, clear
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
*Geographic regions: countries and areas are also grouped geographically into six major areas designated as: Africa; Asia; Europe; Latin America and the Caribbean; Northern America, and Oceania.
* We create the subregion dummies *
gen Asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia")
gen Pacific = (unsubregion == "Melanesia" | unsubregion == "Australia/New Zealand")
gen Europe = (unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe")
gen Northamerica = (unsubregion == "North America")
gen LAC = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen Africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in Asia Pacific Europe Northamerica LAC Africa {
replace unregion = "`X'" if `X' == 1
}
tab unsubregion unregion, m
tab unsubregion if unregion == ""
order country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop unregion
keep country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop unregion
collapse (mean) pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(unregion)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2 = round(pcstock2,1)
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
sort unregion
merge unregion using temp2
drop _m
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
gen group = unregion
drop unregion
gen classification = "5. UN region"
append using temp
order group
sort group
save temp, replace

***** 6. UN SUBREGIONS *****

* These are for the "sum" variables. 
use country3Dfull, clear
desc stock3D_new stock2D
*Geographic regions: countries and areas are also grouped geographically into six major areas designated as: Africa; Asia; Europe; Latin America and the Caribbean; Northern America, and Oceania.
* We create the subregion dummies *
gen Asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia")
gen Pacific = (unsubregion == "Melanesia" | unsubregion == "Australia/New Zealand")
gen Europe = (unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe")
gen Northamerica = (unsubregion == "North America")
gen LAC = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen Africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in Asia Pacific Europe Northamerica LAC Africa {
replace unregion = "`X'" if `X' == 1
}
replace unsubregion = unregion + " - " + unsubregion
collapse (sum) stock2D stock3D_new, by(unsubregion) 
* Cubic m and sq m
* We transform into cubic km and sq km
replace stock2D = stock2D/1000000
replace stock3D_new = stock3D_new*0.000000001
sort unsubregion
save temp2, replace

* These are for the "mean" variables. 
use country3Dfull, clear
order country_wb
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
*Geographic regions: countries and areas are also grouped geographically into six major areas designated as: Africa; Asia; Europe; Latin America and the Caribbean; Northern America, and Oceania.
* We create the subregion dummies *
gen Asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia")
gen Pacific = (unsubregion == "Melanesia" | unsubregion == "Australia/New Zealand")
gen Europe = (unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe")
gen Northamerica = (unsubregion == "North America")
gen LAC = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen Africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in Asia Pacific Europe Northamerica LAC Africa {
replace unregion = "`X'" if `X' == 1
}
tab unsubregion unregion, m
tab unsubregion if unregion == ""
replace unsubregion = unregion + " - " + unsubregion
order country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop unsubregion
keep country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop unsubregion
collapse (mean) pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(unsubregion)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2 = round(pcstock2,1)
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
sort unsubregion
merge unsubregion using temp2
drop _m
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
gen group = unsubregion
drop unsubregion
gen classification = "6. UN subregion"
append using temp
order group
drop if group == "" | group == " - "
sort classification group
order classification group stock2D avght_new_div stock3D_new
export excel using "FinalTables\table1.xls", firstrow(variables) replace
* For the final table in the draft, we only use some groupings and variables
drop if classification == "4. WB region" | classification == "6. UN subregion"
gen shareUP = shareLRin3D+shareHRin3D
keep classification group stock3D_new pcstock3D_new share2Din3D shareUP shareLRin3D shareHRin3D
order classification group stock3D_new pcstock3D_new share2Din3D shareUP shareLRin3D shareHRin3
export excel using "FinalTables\table1.xls", firstrow(variables) replace

**************************************
*** TABLE 2 - PANEL A: URBAN AREAS ***
**************************************

***** 1. WORLD *****

* Sum variables
use city3Dfull, clear
collapse (sum) stock2D stock3D_new stock3D_2D stock3D_no2D
desc stock3D_new stock2D
* Cubic m and sq m
* We transform into million sq km and sq km
replace stock2D = stock2D/1000000
replace stock3D_new = stock3D_new*0.000000001
replace stock3D_2D = stock3D_2D*0.000000001
replace stock3D_no2D = stock3D_no2D*0.000000001
drop stock3D_no2D stock3D_no2D
gen count = 1
sort count
save count, replace

* Mean variables 
use city3Dfull, clear
order country_wb
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
order country_w pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
keep country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D pcstock3D_2D share2D* shareLR* shareHR* totpop incgroup2019
collapse (mean) pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop]
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2 = round(pcstock2,1)
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
gen count = 1
sort count
merge count using count
drop _m
drop count
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
gen group = "World"
gen classification = "1. World"
order classification group
save temp, replace

* WE CORRECT MANUALLY THESE ONES MANUALLY IN THE TABLE AS WE GET THE SUMS AND THEN THE MEAN
* Whereas above we report the mean directly
* Shares outward vs upward lowrises and highrises
use city3Dfull, clear
desc stock3D_new stock2D
collapse (sum) stock3D_new stock3D_2D stock3D_no2D stock3D_HR stock3D_LR
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
sum share2Din3D-shareLRin3D
* 36 59 5

***** 2. DEVELOPED/DEVELOPING STATUS 2019 *****

* Sum variables 
use city3Dfull, clear
* These are small countries, so they should not matter overall. 
drop if incgroup2019 == ""
gen developed = (incgroup2019 == "H")
collapse (sum) stock2D stock3D_new stock3D_2D stock3D_no2D, by(developed)
desc stock3D_new stock2D
* Cubic m and sq m
* We transform into million sq km and sq km
replace stock2D = stock2D/1000000
replace stock3D_new = stock3D_new*0.000000001
replace stock3D_2D = stock3D_2D*0.000000001
replace stock3D_no2D = stock3D_no2D*0.000000001
drop stock3D_no2D stock3D_no2D
sort developed
save count, replace

* Mean variables
use city3Dfull, clear
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
order country_w pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
keep country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
tab incgroup2019, m
tab country_wb if incgroup2019 == ""
* These are small countries, so they should not matter overall. 
drop if incgroup2019 == ""
gen developed = (incgroup2019 == "H")
collapse (mean) pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(developed)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2 = round(pcstock2,1)
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
sort developed
merge developed using count
drop _m
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
gen group = "A. Developed Economies" if developed == 1
replace group = "B. Developing Economies" if developed == 0
drop developed 
gen classification = "2. Developed/developing '19"
append using temp
order classification group
save temp, replace

* WE CORRECT MANUALLY THESE ONES MANUALLY IN THE TABLE AS WE GET THE SUMS AND THEN THE MEAN
* Whereas above we report the mean directly
* Shares outward vs upward lowrises and highrises
use city3Dfull, clear
drop if incgroup2019 == ""
gen developed = (incgroup2019 == "H")
desc stock3D_new stock2D
collapse (sum) stock3D_new stock3D_2D stock3D_no2D stock3D_HR stock3D_LR, by(developed)
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
bysort developed: sum share2Din3D-shareLRin3D
* 35 59 6
* 37 59 4

***** 3. INCOME GROUP 2019 *****

* Sum variables
use city3Dfull, clear
* These are small countries, so they should not matter overall. 
drop if incgroup2019 == ""
gen developed = (incgroup2019 == "H")
collapse (sum) stock2D stock3D_new stock3D_2D stock3D_no2D, by(incgroup2019)
desc stock3D_new stock2D
* Cubic m and sq m
* We transform into million sq km and sq km
replace stock2D = stock2D/1000000
replace stock3D_new = stock3D_new*0.000000001
replace stock3D_2D = stock3D_2D*0.000000001
replace stock3D_no2D = stock3D_no2D*0.000000001
drop stock3D_no2D stock3D_no2D
sort incgroup2019
save count, replace

* Mean variables
use city3Dfull, clear
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
order country_w pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
keep country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
tab incgroup2019, m
tab country_wb if incgroup2019 == ""
* These are small countries, so they should not matter overall. 
drop if incgroup2019 == ""
collapse (mean) pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(incgroup2019)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2 = round(pcstock2,1)
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
sort incgroup2019
merge incgroup2019 using count
drop _m 
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
gen group = "A. High-Income Economies" if incgroup2019 == "H"
replace group = "B. Upper-Middle-Income Economies" if incgroup2019 == "UM"
replace group = "C. Lower-Income Economies" if incgroup2019 == "LM"
replace group = "D. Low-Income Economies" if incgroup2019 == "L"
drop incgroup* 
gen classification = "3. Income groups '19"
append using temp
order classification group
gsort+ classification
save temp, replace

***** 4. WB REGIONS *****

* Sum variables
use city3Dfull, clear
* These are small countries, so they should not matter overall. 
drop if incgroup2019 == ""
gen developed = (incgroup2019 == "H")
collapse (sum) stock2D stock3D_new stock3D_2D stock3D_no2D, by(wbregionshort)
desc stock3D_new stock2D
* Cubic m and sq m
* We transform into million sq km and sq km
replace stock2D = stock2D/1000000
replace stock3D_new = stock3D_new*0.000000001
replace stock3D_2D = stock3D_2D*0.000000001
replace stock3D_no2D = stock3D_no2D*0.000000001
drop stock3D_no2D stock3D_no2D
sort wbregionshort
save count, replace

* Mean variables
use city3Dfull, clear
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
order country_w pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop wbregionshort
keep country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop wbregionshort
tab wbregionshort, m
collapse (mean) pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(wbregionshort)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2 = round(pcstock2,1)
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
sort wbregionshort
merge wbregionshort using count
drop _m 
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
gen group = wbregionshort
drop wbregionshort 
gen classification = "4. WB region"
append using temp
order group
sort group
save temp, replace

***** 5. UN REGIONS *****

* Sum variables
use city3Dfull, clear
* These are small countries, so they should not matter overall. 
drop if incgroup2019 == ""
gen developed = (incgroup2019 == "H")
*Geographic regions: countries and areas are also grouped geographically into six major areas designated as: Africa; Asia; Europe; Latin America and the Caribbean; Northern America, and Oceania.
* We create the subregion dummies *
gen Asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia")
gen Pacific = (unsubregion == "Melanesia" | unsubregion == "Australia/New Zealand")
gen Europe = (unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe")
gen Northamerica = (unsubregion == "North America")
gen LAC = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen Africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in Asia Pacific Europe Northamerica LAC Africa {
replace unregion = "`X'" if `X' == 1
}
tab unsubregion unregion, m
tab unsubregion if unregion == ""
collapse (sum) stock2D stock3D_new stock3D_2D stock3D_no2D, by(unregion)
desc stock3D_new stock2D
* Cubic m and sq m
* We transform into million sq km and sq km
replace stock2D = stock2D/1000000
replace stock3D_new = stock3D_new*0.000000001
replace stock3D_2D = stock3D_2D*0.000000001
replace stock3D_no2D = stock3D_no2D*0.000000001
drop stock3D_no2D stock3D_no2D
sort unregion
save count, replace

* Mean variables
use city3Dfull, clear
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
*Geographic regions: countries and areas are also grouped geographically into six major areas designated as: Africa; Asia; Europe; Latin America and the Caribbean; Northern America, and Oceania.
* We create the subregion dummies *
gen Asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia")
gen Pacific = (unsubregion == "Melanesia" | unsubregion == "Australia/New Zealand")
gen Europe = (unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe")
gen Northamerica = (unsubregion == "North America")
gen LAC = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen Africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in Asia Pacific Europe Northamerica LAC Africa {
replace unregion = "`X'" if `X' == 1
}
tab unsubregion unregion, m
tab unsubregion if unregion == ""
order country_w pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop unregion
keep country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop unregion
collapse (mean) pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(unregion)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2 = round(pcstock2,1)
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
sort unregion
merge unregion using count
drop _m 
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
gen group = unregion
drop unregion
gen classification = "5. UN region"
append using temp
order group
sort group
save temp, replace

***** 6. UN SUBREGIONS *****

* Sum variables
use city3Dfull, clear
* These are small countries, so they should not matter overall. 
drop if incgroup2019 == ""
gen developed = (incgroup2019 == "H")
*Geographic regions: countries and areas are also grouped geographically into six major areas designated as: Africa; Asia; Europe; Latin America and the Caribbean; Northern America, and Oceania.
* We create the subregion dummies *
gen Asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia")
gen Pacific = (unsubregion == "Melanesia" | unsubregion == "Australia/New Zealand")
gen Europe = (unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe")
gen Northamerica = (unsubregion == "North America")
gen LAC = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen Africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in Asia Pacific Europe Northamerica LAC Africa {
replace unregion = "`X'" if `X' == 1
}
tab unsubregion unregion, m
tab unsubregion if unregion == ""
replace unsubregion = unregion + " - " + unsubregion
collapse (sum) stock2D stock3D_new stock3D_2D stock3D_no2D, by(unsubregion)
desc stock3D_new stock2D
* Cubic m and sq m
* We transform into million sq km and sq km
replace stock2D = stock2D/1000000
replace stock3D_new = stock3D_new*0.000000001
replace stock3D_2D = stock3D_2D*0.000000001
replace stock3D_no2D = stock3D_no2D*0.000000001
drop stock3D_no2D stock3D_no2D
sort unsubregion
save count, replace

* Mean variables
use city3Dfull, clear
gen share2Din3D = stock3D_2D/stock3D_new*100
gen shareHRin3D = stock3D_HR/stock3D_new *100
gen shareLRin3D = 100-share2Din3D-shareHRin3D
*Geographic regions: countries and areas are also grouped geographically into six major areas designated as: Africa; Asia; Europe; Latin America and the Caribbean; Northern America, and Oceania.
* We create the subregion dummies *
gen Asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia")
gen Pacific = (unsubregion == "Melanesia" | unsubregion == "Australia/New Zealand")
gen Europe = (unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe")
gen Northamerica = (unsubregion == "North America")
gen LAC = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen Africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in Asia Pacific Europe Northamerica LAC Africa {
replace unregion = "`X'" if `X' == 1
}
tab unsubregion unregion, m
tab unsubregion if unregion == ""
replace unsubregion = unregion + " - " + unsubregion
order country_w pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop unsubregion
keep country_w stock3D_new stock2D pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop unsubregion
collapse (mean) pcstock3D_new pcstock2D avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(unsubregion)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2 = round(pcstock2,1)
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
sort unsubregion
merge unsubregion using count
drop _m 
replace stock3D_new = round(stock3D_new,1)
replace stock2D = round(stock2D,1)
gen group = unsubregion
drop unsubregion
gen classification = "6. UN subregion"
append using temp
order group
drop if group == ""
sort classification group
order classification group stock2D avght_new_div stock3D_new
export excel using "FinalTables\table2panelA.xls", firstrow(variables) replace
* For the final table in the draft, we only use some groupings and variables
drop if classification == "4. WB region" | classification == "6. UN subregion"
gen shareUP = shareLRin3D+shareHRin3D
keep classification group stock3D_new pcstock3D_new share2Din3D shareUP shareLRin3D shareHRin3D
order classification group stock3D_new pcstock3D_new share2Din3D shareUP shareLRin3D shareHRin3
export excel using "FinalTables\table2panelA.xls", firstrow(variables) replace

************************************
*** TABLE 2 PANEL B: RURAL AREAS ***
************************************

***** 1. WORLD *****

* Rural 
* We remove the city totals from the total totals 
use city3Dfull, clear
collapse (sum) stock3D_new stock3D_old stock2D totpop AREA stock3D_HR, by(incgroup2019 country_wb)
ren totpop totpop_city
ren AREA totarea_city
ren stock3D_new stock3D_new_city
ren stock3D_old stock3D_old_city
ren stock2D stock2D_city
ren stock3D_HR stock3D_HR_city
sort country_wb
save temp2, replace 
count
* 184 countries with cities
* Sum for countries 
use country3Dfull, clear
collapse (sum) stock3D_new stock3D_old stock2D totpop2015 landareasqkm stock3D_HR, by(incgroup2019 country_wb)
ren totpop totpop_cntry
ren landareasqkm totarea_cntry
ren stock3D_new stock3D_new_cntry
ren stock3D_old stock3D_old_cntry
ren stock2D stock2D_cntry
ren stock3D_HR stock3D_HR_cntry
count
sort country_wb
merge country_wb using temp2
tab _m
drop _m
* Many do not have cities, so they are fully rural
foreach X of varlist *city {
replace `X' = 0 if `X' == .
}
* Variables for rural 
foreach X in totpop totarea stock3D_new stock3D_old stock2D stock3D_HR {
gen `X'_rural = `X'_cntry-`X'_city
}
sum tot*_rural, d
gsort- totarea_rural
tab country_wb if totpop_rural < 0 
tab country_wb if totarea_rural < 0 
foreach X of varlist totpop_rural totarea_rural {
replace `X' = 0 if `X' < 0
}
* crowding each
foreach X in cntry city rural {
gen pcstock3D_new_`X' = stock3D_new_`X'/totpop_`X'
sum pcstock3D_new_`X' [w=totpop_`X']
}
* 216, 192, 238
* In the draft, we reported 193 and 240, which is very close (from another part of the do-file below) 
foreach X in cntry city rural {
gen pcstock2D_new_`X' = stock2D_`X'/totpop_`X'
gen avght_new_div_`X' = stock3D_new_`X'/stock2D_`X'
gen stock3D_2D_`X' = stock2D_`X'*2.5
gen pcstock3D_2D_`X' = stock3D_2D_`X'/totpop_`X'
gen pcstock3D_no2D_`X' = pcstock3D_new_`X'-pcstock3D_2D_`X'
gen share2Din3D_`X' = stock3D_2D_`X'/stock3D_new_`X'*100
gen shareHRin3D_`X' = stock3D_HR_`X'/stock3D_new_`X' *100
gen shareLRin3D_`X' = 100-share2Din3D_`X'-shareHRin3D_`X'
}
* Variables we need 
keep country_wb stock2D* stock3D_new* pcstock3D_new* pcstock2D_new* avght_new_div* pcstock3D_2D* pcstock3D_no2D* share2D* shareLR* shareHR* totpop* incgroup2019
reshape long stock2D stock3D_new pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2Din3D shareLRin3D shareHRin3D totpop totarea, i(incgroup2019 country_wb) j(citysize) string
order country_w stock2D stock3D_new pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
keep country_w stock2D stock3D_new pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D pcstock3D_2D share2D* shareLR* shareHR* totpop incgroup2019 citysize
save temp2, replace
collapse (sum) stock2D stock3D_new, by(citysize)
sort citysize
save count, replace
use temp2, clear
collapse (mean) pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(citysize)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2D_new = round(pcstock2D_new,1)
ren pcstock2D_new pcstock2D
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
gen group = "World"
gen classification = "1. World"
order classification group
sort citysize
merge citysize using count
drop _m
keep if citysize == "_rural"
save temp_rural, replace

***** 2. DEVELOPED/DEVELOPING STATUS 2019 *****

* Rural 
* We remove the city totals from the total totals 
use city3Dfull, clear
collapse (sum) stock3D_new stock3D_old stock2D totpop AREA stock3D_HR, by(incgroup2019 country_wb)
ren totpop totpop_city
ren AREA totarea_city
ren stock3D_new stock3D_new_city
ren stock3D_old stock3D_old_city
ren stock2D stock2D_city
ren stock3D_HR stock3D_HR_city
sort country_wb
save temp2, replace 
count
* 184 countries with cities
* Sum for countries 
use country3Dfull, clear
collapse (sum) stock3D_new stock3D_old stock2D totpop2015 landareasqkm stock3D_HR, by(incgroup2019 country_wb)
ren totpop totpop_cntry
ren landareasqkm totarea_cntry
ren stock3D_new stock3D_new_cntry
ren stock3D_old stock3D_old_cntry
ren stock2D stock2D_cntry
ren stock3D_HR stock3D_HR_cntry
count
* 243 countries
sort country_wb
merge country_wb using temp2
tab _m
drop _m
* Many do not have cities, so they are fully rural
foreach X of varlist *city {
replace `X' = 0 if `X' == .
}
* Variables for rural 
foreach X in totpop totarea stock3D_new stock3D_old stock2D stock3D_HR {
gen `X'_rural = `X'_cntry-`X'_city
}
sum tot*_rural, d
gsort- totarea_rural
tab country_wb if totpop_rural < 0 
tab country_wb if totarea_rural < 0 
foreach X of varlist totpop_rural totarea_rural {
replace `X' = 0 if `X' < 0
}
* crowding each
foreach X in cntry city rural {
gen pcstock3D_new_`X' = stock3D_new_`X'/totpop_`X'
sum pcstock3D_new_`X' [w=totpop_`X']
}
* 216, 192, 238
* In the draft, we reported 193 and 240, which is very close (from another part of the do-file below) 
foreach X in cntry city rural {
gen pcstock2D_new_`X' = stock2D_`X'/totpop_`X'
gen avght_new_div_`X' = stock3D_new_`X'/stock2D_`X'
gen stock3D_2D_`X' = stock2D_`X'*2.5
gen pcstock3D_2D_`X' = stock3D_2D_`X'/totpop_`X'
gen pcstock3D_no2D_`X' = pcstock3D_new_`X'-pcstock3D_2D_`X'
gen share2Din3D_`X' = stock3D_2D_`X'/stock3D_new_`X'*100
gen shareHRin3D_`X' = stock3D_HR_`X'/stock3D_new_`X' *100
gen shareLRin3D_`X' = 100-share2Din3D_`X'-shareHRin3D_`X'
}
* Variables we need 
keep country_wb country_w stock2D* stock3D_new* pcstock3D_new* pcstock2D_new* avght_new_div* pcstock3D_2D* pcstock3D_no2D* share2D* shareLR* shareHR* totpop* incgroup2019
reshape long country_w stock2D stock3D_new pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2Din3D shareLRin3D shareHRin3D totpop totarea, i(incgroup2019 country_wb) j(citysize) string
order country_wb pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
keep country_wb stock2D stock3D_new stock2D pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D pcstock3D_2D share2D* shareLR* shareHR* totpop incgroup2019 citysize
tab incgroup2019, m
tab country_wb if incgroup2019 == ""
* These are small countries, so they should not matter overall. 
drop if incgroup2019 == ""
gen developed = (incgroup2019 == "H")
save temp2, replace
collapse (sum) stock2D stock3D_new, by(developed citysize)
sort developed citysize
save count, replace
use temp2, clear
collapse (mean) pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(developed citysize)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2D_new = round(pcstock2D_new,1)
ren pcstock2D_new pcstock2D
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
gen group = "A. Developed Economies" if developed == 1
replace group = "B. Developing Economies" if developed == 0
gen classification = "2. Developed/developing '19"
sort developed citysize
merge developed citysize using count
drop _m
drop developed 
keep if citysize == "_rural"
order classification group
append using temp_rural
order classification group
save temp_rural, replace

***** 3. INCOME GROUP 2019 *****

* Rural 
* We remove the city totals from the total totals 
use city3Dfull, clear
collapse (sum) stock3D_new stock3D_old stock2D totpop AREA stock3D_HR, by(incgroup2019 country_wb)
ren totpop totpop_city
ren AREA totarea_city
ren stock3D_new stock3D_new_city
ren stock3D_old stock3D_old_city
ren stock2D stock2D_city
ren stock3D_HR stock3D_HR_city
sort country_wb
save temp2, replace 
count
* 184 countries with cities
* Sum for countries 
use country3Dfull, clear
collapse (sum) stock3D_new stock3D_old stock2D totpop2015 landareasqkm stock3D_HR, by(incgroup2019 country_wb)
ren totpop totpop_cntry
ren landareasqkm totarea_cntry
ren stock3D_new stock3D_new_cntry
ren stock3D_old stock3D_old_cntry
ren stock2D stock2D_cntry
ren stock3D_HR stock3D_HR_cntry
count
* 243 countries
sort country_wb
merge country_wb using temp2
tab _m
drop _m
* Many do not have cities, so they are fully rural
foreach X of varlist *city {
replace `X' = 0 if `X' == .
}
* Variables for rural 
foreach X in totpop totarea stock3D_new stock3D_old stock2D stock3D_HR {
gen `X'_rural = `X'_cntry-`X'_city
}
sum tot*_rural, d
gsort- totarea_rural
tab country_wb if totpop_rural < 0 
tab country_wb if totarea_rural < 0 
foreach X of varlist totpop_rural totarea_rural {
replace `X' = 0 if `X' < 0
}
* crowding each
foreach X in cntry city rural {
gen pcstock3D_new_`X' = stock3D_new_`X'/totpop_`X'
sum pcstock3D_new_`X' [w=totpop_`X']
}
* 216, 192, 238
* In the draft, we reported 193 and 240, which is very close (from another part of the do-file below) 
foreach X in cntry city rural {
gen pcstock2D_new_`X' = stock2D_`X'/totpop_`X'
gen avght_new_div_`X' = stock3D_new_`X'/stock2D_`X'
gen stock3D_2D_`X' = stock2D_`X'*2.5
gen pcstock3D_2D_`X' = stock3D_2D_`X'/totpop_`X'
gen pcstock3D_no2D_`X' = pcstock3D_new_`X'-pcstock3D_2D_`X'
gen share2Din3D_`X' = stock3D_2D_`X'/stock3D_new_`X'*100
gen shareHRin3D_`X' = stock3D_HR_`X'/stock3D_new_`X' *100
gen shareLRin3D_`X' = 100-share2Din3D_`X'-shareHRin3D_`X'
desc stock3D_new_`X' stock2D_`X'
* Cubic m and sq m
* We transform into million cubic km and sq km
*replace stock2D_`X' = stock2D_`X'/1000000
*replace stock3D_new_`X' = stock3D_new_`X'/1000000
}
sum stock2D_* stock3D_new_*
sum pcstock2D_* pcstock3D_new_*
* Variables we need 
keep country_wb stock2D_* stock3D_new_* pcstock3D_new* pcstock2D_new* avght_new_div* pcstock3D_2D* pcstock3D_no2D* share2D* shareLR* shareHR* totpop* incgroup2019
reshape long stock2D stock3D_new pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2Din3D shareLRin3D shareHRin3D totpop totarea, i(incgroup2019 country_wb) j(citysize) string
order country_w stock2D stock3D_new pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop incgroup2019
keep country_w stock2D stock3D_new pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D pcstock3D_2D share2D* shareLR* shareHR* totpop incgroup2019 citysize
tab incgroup2019, m
tab country_wb if incgroup2019 == ""
* These are small countries, so they should not matter overall. 
drop if incgroup2019 == ""
save temp2, replace
collapse (sum) stock2D stock3D_new, by(incgroup2019 citysize)
sort incgroup2019 citysize
save count, replace
use temp2, clear
collapse (mean) pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(incgroup2019 citysize)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2D_new = round(pcstock2D_new,1)
ren pcstock2D_new pcstock2D
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
gen group = "A. High-Income Economies" if incgroup2019 == "H"
replace group = "B. Upper-Middle-Income Economies" if incgroup2019 == "UM"
replace group = "C. Lower-Income Economies" if incgroup2019 == "LM"
replace group = "D. Low-Income Economies" if incgroup2019 == "L"
sort incgroup2019 citysize
merge incgroup2019 citysize using count
drop _m
drop incgroup* 
gen classification = "3. Income groups '19"
keep if citysize == "_rural"
append using temp_rural
save temp_rural, replace

***** 4. UN REGION *****

* Rural 
* We remove the city totals from the total totals 
use city3Dfull, clear
* We create the subregion dummies *
gen Asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia")
gen Pacific = (unsubregion == "Melanesia" | unsubregion == "Australia/New Zealand")
gen Europe = (unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe")
gen Northamerica = (unsubregion == "North America")
gen LAC = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen Africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in Asia Pacific Europe Northamerica LAC Africa {
replace unregion = "`X'" if `X' == 1
}
collapse (sum) stock3D_new stock3D_old stock2D totpop AREA stock3D_HR, by(unregion country_wb)
ren totpop totpop_city
ren AREA totarea_city
ren stock3D_new stock3D_new_city
ren stock3D_old stock3D_old_city
ren stock2D stock2D_city
ren stock3D_HR stock3D_HR_city
sort country_wb
save temp2, replace 
count
* 184 countries with cities
* Sum for countries 
use country3Dfull, clear
* We create the subregion dummies *
gen Asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia")
gen Pacific = (unsubregion == "Melanesia" | unsubregion == "Australia/New Zealand")
gen Europe = (unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe")
gen Northamerica = (unsubregion == "North America")
gen LAC = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen Africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in Asia Pacific Europe Northamerica LAC Africa {
replace unregion = "`X'" if `X' == 1
}
collapse (sum) stock3D_new stock3D_old stock2D totpop2015 landareasqkm stock3D_HR, by(unregion country_wb)
ren totpop totpop_cntry
ren landareasqkm totarea_cntry
ren stock3D_new stock3D_new_cntry
ren stock3D_old stock3D_old_cntry
ren stock2D stock2D_cntry
ren stock3D_HR stock3D_HR_cntry
count
* 243 countries
sort country_wb
merge country_wb using temp2
tab _m
drop _m
* Many do not have cities, so they are fully rural
foreach X of varlist *city {
replace `X' = 0 if `X' == .
}
* Variables for rural 
foreach X in totpop totarea stock3D_new stock3D_old stock2D stock3D_HR {
gen `X'_rural = `X'_cntry-`X'_city
}
sum tot*_rural, d
gsort- totarea_rural
tab country_wb if totpop_rural < 0 
tab country_wb if totarea_rural < 0 
foreach X of varlist totpop_rural totarea_rural {
replace `X' = 0 if `X' < 0
}
* crowding each
foreach X in cntry city rural {
gen pcstock3D_new_`X' = stock3D_new_`X'/totpop_`X'
sum pcstock3D_new_`X' [w=totpop_`X']
}
* 216, 192, 238
* In the draft, we reported 193 and 240, which is very close (from another part of the do-file below) 
foreach X in cntry city rural {
gen pcstock2D_new_`X' = stock2D_`X'/totpop_`X'
gen avght_new_div_`X' = stock3D_new_`X'/stock2D_`X'
gen stock3D_2D_`X' = stock2D_`X'*2.5
gen pcstock3D_2D_`X' = stock3D_2D_`X'/totpop_`X'
gen pcstock3D_no2D_`X' = pcstock3D_new_`X'-pcstock3D_2D_`X'
gen share2Din3D_`X' = stock3D_2D_`X'/stock3D_new_`X'*100
gen shareHRin3D_`X' = stock3D_HR_`X'/stock3D_new_`X' *100
gen shareLRin3D_`X' = 100-share2Din3D_`X'-shareHRin3D_`X'
desc stock3D_new_`X' stock2D_`X'
* Cubic m and sq m
* We transform into million cubic km and sq km
*replace stock2D_`X' = stock2D_`X'/1000000
*replace stock3D_new_`X' = stock3D_new_`X'/1000000
}
sum stock2D_* stock3D_new_*
sum pcstock2D_* pcstock3D_new_*
* Variables we need 
keep country_wb stock2D_* stock3D_new_* pcstock3D_new* pcstock2D_new* avght_new_div* pcstock3D_2D* pcstock3D_no2D* share2D* shareLR* shareHR* totpop* unregion
reshape long stock2D stock3D_new pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2Din3D shareLRin3D shareHRin3D totpop totarea, i(unregion country_wb) j(citysize) string
order country_w stock2D stock3D_new pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareHR* totpop unregion
keep country_w stock2D stock3D_new pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D pcstock3D_2D share2D* shareLR* shareHR* totpop unregion citysize
save temp2, replace
collapse (sum) stock2D stock3D_new, by(unregion citysize)
sort unregion citysize
save count, replace
use temp2, clear
collapse (mean) pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2D* shareLR* shareH [w=totpop], by(unregion citysize)
label var share2Din3D "Share of 2D volume in total 3D volume"
label var shareHRin3D "Share of high-rises in total 3D volume"
replace pcstock3D_new = round(pcstock3D_new,1)
replace pcstock2D_new = round(pcstock2D_new,1)
ren pcstock2D_new pcstock2D
replace pcstock3D_2D = round(pcstock3D_2D,1)
replace pcstock3D_no2D = round(pcstock3D_no2D,1)
replace avght_new_div = round(avght_new_div,0.1)
replace share2Din3D = round(share2Din3D,1)
replace shareLRin3D = round(shareLRin3D,1)
replace shareHRin3D = round(shareHRin3D,1)
sort unregion citysize
merge unregion citysize using count
drop _m
gen group = unregion
drop if group == ""
drop unregion
gen classification = "4. UN regions"
keep if citysize == "_rural"
append using temp_rural
save temp_rural, replace

***** CREATING THE FINAL TABLE *****

* Cities rows + rural rows *
use temp_rural, clear
order classification group
gsort+ classification
sort classification group citysize
order classification
*keep if citysize == "0. 0-50"
* sq km
replace stock2D = stock2D/1000000
* cubic km 
replace stock3D_new = stock3D_new/1000000000
replace stock2D = round(stock2D,1)
replace stock3D_new = round(stock3D_new,1)
replace shareHRin3D = round(shareHRin3D,1)
order classification group citysize  stock2D avght_new_div stock3D_new
export excel using "FinalTables\table2panelB.xls", firstrow(variables) replace
* For the final table in the draft, we only use some groupings and variables
drop if classification == "4. WB region" | classification == "6. UN subregion"
gen shareUP = shareLRin3D+shareHRin3D
keep classification group stock3D_new pcstock3D_new share2Din3D shareUP shareLRin3D shareHRin3D
order classification group stock3D_new pcstock3D_new share2Din3D shareUP shareLRin3D shareHRin3
export excel using "FinalTables\table2panelB.xls", firstrow(variables) replace

******************************************************************************************

**************
**************
** FIGURES ***
**************
**************

***************
** FIGURE 1 ***
***************

* See the folder "WSF3Dv2 Creation"

***************
** FIGURE 2 ***
***************

* See the folder "WSF3Dv2 Creation"

***************
** FIGURE 3 ***
***************

* Was created using Powerpoint and then exported to png
* "schematic.png"

****************
*** FIGURE 4 ***
****************

* We obtain the mean volume per capita for all cities in a country
use city3Dfull, clear
collapse (sum) stock3D_new stock3D_2D stock3D_no2D P15, by(country_wb) 
gen stockpc = stock3D_new/P15
sum stockpc
ren stock3D_new stock_city
ren P15 totpop_city
ren stockpc stockpc_city
ren stock3D_2D stock3D_2D_city
ren stock3D_no2D stock3D_no2D_city
gen test = stock3D_2D_city+stock3D_no2D_city
corr test stock_city
drop test
keep country_wb stock_city stock3D_2D_city stock3D_no2D_city stockpc_city totpop_city
sort country_wb
save temp_city, replace

* We obtain the mean volume per capita for all mega-cities (>= 5 million) in a country
use city3Dfull, clear
keep if P15 >= 5000000
collapse (sum) stock3D_new P15, by(country_wb) 
gen stockpc = stock3D_new/P15
sum stockpc
* 193 cubic m per capita
ren stock3D_new stock_megacity
ren P15 totpop_megacity
ren stockpc stockpc_megacity
keep country_wb stock_megacity stockpc_megacity totpop_megacity
sort country_wb
save temp_megacity, replace

* We obtain the mean volume per capita for all rural areas & small towns in a country
use country3Dfull, clear
collapse (sum) stock3D_new stock3D_2D stock3D_no2D totpop2015, by(country_wb) 
ren stock3D_new stock_cntry
ren totpop2015 totpop_cntry
ren stock3D_2D stock3D_2D_cntry
ren stock3D_no2D stock3D_no2D_cntry
* We do so by removing the total for urban areas from the total using all localities (i.e., the data at the country level).
sort country_wb
merge country_wb using temp_city
tab _m
drop _m
replace stock_city = 0 if stock_city == .
replace stock3D_2D_city = 0 if stock3D_2D_city == .
replace stock3D_no2D_city = 0 if stock3D_no2D_city == .
gen stock_rural = stock_cntry-stock_city
sum stock_rural, d
replace stock_rural = 0 if stock_rural == .
gen stock3D_2D_rural = stock3D_2D_cntry-stock3D_2D_city
sum stock3D_2D_rural, d
replace stock3D_2D_rural = 0 if stock3D_2D_rural == .
gen stock3D_no2D_rural = stock3D_no2D_cntry-stock3D_no2D_city
sum stock3D_no2D_rural, d
replace stock3D_no2D_rural = 0 if stock3D_no2D_rural == .
gen totpop_rural = totpop_cntry-totpop_city
sum totpop_rural, d
tab country_wb if totpop_rural < 0
replace totpop_rura = 0 if totpop_rura < 0
gen stockpc_rural = stock_rural/totpop_rural
sum stockpc_rural
* 240 cubic m per capita
keep country_wb stock*_rural stockpc_rural totpop_rural
sort country_wb
save temp_rural, replace

* We add the estimates averages to the country-level averages. 
use country3Dfull, clear
keep if sample == 1 & lpcgdp2010s != .
count
* 204
corr lpcgdp*
gsort- pcstock3D_new
order country_wb pcstock3D_new
sort country_wb
merge country_wb using temp_city
tab _m
drop _m
sort country_wb
merge country_wb using temp_rural
tab _m
drop _m
sort country_wb
merge country_wb using temp_megacity
tab _m
drop _m

foreach X in pcstock3D_new {
twoway (qfit `X' lpcgdp2010s [w=totpop], lcolor(blue) lpattern(solid) lwidth(medthick))(qfit stockpc_megacity lpcgdp2010s [w=totpop_city], lcolor(orange) lpattern(longdash) lwidth(medthick))(qfit stockpc_city lpcgdp2010s [w=totpop_city], lcolor(gs8) lpattern(longdash) lwidth(medthick))(qfit stockpc_rural lpcgdp2010s [w=totpop_rural], lcolor(gs8) lpattern(dash) lwidth(medthick))(scatter `X' lpcgdp2010s [w=totpop], msymbol(circle_hollow) mcolor(ltblue))(scatter `X' lpcgdp2010s if rankpop <= 50, mlabel(ccode) mlabposition(9) msymbol(point) msize(tiny) mcolor(white) mlabcolor(blue) mlabsize(tiny)), legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ytitle(Volume per capita (cubic meters per inh.), margin(medsmall)) xtitle(Log per capita GDP (cst 2017 intl dol, PPP), margin(small)) graphregion(margin(tiny)) legend(order(1 "Overall" 2 "Cities" 3 "Rural+Towns") row(1))
}
* We modify manually - this is Figure 4
graph use "volpc_econdvt_new.gph"
graph export "FinalFigures\volpc_econdvt_new.png", replace width(2620) height(1908)

** Regressions **
** FOR ALL ECONOMIES **
foreach X in pcstock3D_new stockpc_city stockpc_rural stockpc_megacity  { 
reg `X' lpcgdp2010s, robust
}
gen pcstock_city_natpop = stock_city/totpop
gen pcstock_rural_natpop = stock_rural/totpop
gen pcstock_megacity_natpop = stock_megacity/totpop
foreach X in pcstock3D_new pcstock_city_natpop pcstock_rural_natpop pcstock_megacity_natpop { 
reg `X' lpcgdp2010s, robust
}
* 112***, cities 51***, rural 70***
* share rural =  69.77304/112.3613*100 = 62%
* share megacities in urban = 34.96658/51.3414 = 68% (but we find a much lower share when using pop weights - see below)
* Population weighted
foreach X in pcstock3D_new pcstock_city_natpop pcstock_rural_natpop pcstock_megacity_natpop { 
reg `X' lpcgdp2010s [w=totpop], robust
}
* 138***, cities 59***, rural 79***
* share rural =  78.66742/137.6626*100 = 57%
* We say about 60% for the share of rural
* share megacities in urban = 38% (we report this one since it is consistent with the figure where we use some pop weights)

** Decomposition upward vs outward - RURAL **
gen pcstock3D_2D_rural_natpop = stock3D_2D_rural/totpop
gen pcstock3D_no2D_rural_natpop = stock3D_no2D_rural/totpop
foreach X in pcstock_rural_natpop pcstock3D_2D_rural_natpop pcstock3D_no2D_rural_natpop { 
reg `X' lpcgdp2010s, robust
}
* Share of upward = 40.20107/78.66742 *100 = 62.4%
foreach X in pcstock_rural_natpop pcstock3D_2D_rural_natpop pcstock3D_no2D_rural_natpop { 
reg `X' lpcgdp2010s [w=totpop], robust
}
* Share of upward = 43.48777/69.77304*100 = 57.6%
* We say around 60%
** Decomposition upward vs outward - URBAN **
gen pcstock3D_2D_city_natpop = stock3D_2D_city/totpop
gen pcstock3D_no2D_city_natpop = stock3D_no2D_city/totpop
foreach X in pcstock_city_natpop pcstock3D_2D_city_natpop pcstock3D_no2D_city_natpop { 
reg `X' lpcgdp2010s, robust
}
* Share of upward = 75%
foreach X in pcstock_city_natpop pcstock3D_2D_city_natpop pcstock3D_no2D_city_natpop { 
reg `X' lpcgdp2010s [w=totpop], robust
}
* Share of upward = 70%
* We say around 70%

** Regressions **
** FOR G ECONOMIES **
foreach X in pcstock3D_new pcstock_city_natpop pcstock_rural_natpop pcstock_megacity_natpop { 
reg `X' lpcgdp2010s if incgroup2019 != "H", robust
}
* share rural = 47.32374/77.18318*100 = 61%
* share megacities in urban = 13.38983/31.85983*100 = 42%
* Population weighted
foreach X in pcstock3D_new pcstock_city_natpop pcstock_rural_natpop pcstock_megacity_natpop { 
reg `X' lpcgdp2010s [w=totpop] if incgroup2019 != "H", robust
}
* share rural =  53.04019/103.715*100 = 51%
* share megacities in urban = 15.46461/50.68056*100 = 31%

** Decomposition upward vs outward - RURAL **
foreach X in pcstock_rural_natpop pcstock3D_2D_rural_natpop pcstock3D_no2D_rural_natpop { 
reg `X' lpcgdp2010s if incgroup2019 != "H", robust
}
* Share of upward = 22.68883/47.32374*100 = 48%
foreach X in pcstock_rural_natpop pcstock3D_2D_rural_natpop pcstock3D_no2D_rural_natpop { 
reg `X' lpcgdp2010s [w=totpop] if incgroup2019 != "H", robust
}
* Share of upward = 28.71507/53.04019*100 = 54%
* This is 50% here 
** Decomposition upward vs outward - URBAN **
foreach X in pcstock_city_natpop pcstock3D_2D_city_natpop pcstock3D_no2D_city_natpop { 
reg `X' lpcgdp2010s if incgroup2019 != "H", robust
}
* Share of upward = 69%
foreach X in pcstock_city_natpop pcstock3D_2D_city_natpop pcstock3D_no2D_city_natpop { 
reg `X' lpcgdp2010s [w=totpop] if incgroup2019 != "H", robust
}
* Share of upward = 74%
* We say around 70%

* This figure shows the cut-off after which volume per capita increases with income
* Local polynomial
foreach X in pcstock3D_new {
twoway (lpoly `X' lpcgdp2010s [w=totpop], lcolor(blue) lpattern(solid) lwidth(medthick) degree(0)  bwidth(0.5)), xlabel(7(1)12) xmtick(7(0.1)12) xline(8.3)
}
graph export "FinalFigures\volpc_econdvt_new_appx.png", replace width(2620) height(1908)
* cut-off around 8.3
gen cutoff = exp(8.3)
sum cutoff
* 4023.872
tab incgroup2019 if pcgdp2010s < 4023.872
tab incgroup2015 if pcgdp2010s < 4023.872
tab incgroup2010 if pcgdp2010s < 4023.872
* Mostly low-income countries
* https://datahelpdesk.worldbank.org/knowledgebase/articles/906519-world-bank-country-and-lending-groups#:~:text=For%20the%20current%202024%20fiscal,those%20with%20a%20GNI%20per
* About 1K cut-off for LICs, 4K for MICs

****************
*** FIGURE 5 ***
****************

* We use the country-level data and aggregate at the subregional level.
use country3Dfull, clear
keep if sample == 1 & lpcgdp2010s != .
count
* 204
collapse (mean) pcstock3D_new lpcgdp2010s [w=totpop], by(unsubregion)
* We add the total population of each U.N. subregion
sort unsubregion
merge unsubregion using totpopsubregion
tab _m
drop _m
* We create the main regional groups used in the figure. 
gen asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia" | unsubregion == "Melanesia")
gen neoeuro = (unsubregion == "Australia/New Zealand" | unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe" | unsubregion == "North America")
gen lac = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in asia neoeuro lac africa {
replace unregion = "`X'" if `X' == 1
}
tab unregion, m
bysort unregion: tab unsubregion
* We now create the figure. 
twoway (scatter pcstock3D_new lpcgdp2010s if unregion == "africa", mlabel(unsubregion) mcolor(pink) mlabcolor(pink) msymbol(circle_hollow))(scatter pcstock3D_new lpcgdp2010s if unregion == "asia", mlabel(unsubregion) mcolor(green) mlabcolor(green) msymbol(square_hollow))(scatter pcstock3D_new lpcgdp2010s if unregion == "lac", mlabel(unsubregion) mcolor(red) mlabcolor(red) msymbol(triangle_hollow))(scatter pcstock3D_new lpcgdp2010s if unregion == "neoeuro", mlabel(unsubregion) mcolor(blue) mlabcolor(blue) msymbol(diamond_hollow))(qfit pcstock3D_new lpcgdp2010s [w=totpop], lcolor(gs6) lpattern(dash) lwidth(medthick)), legend(order(1 "Africa" 2 "Asia-Pacific" 3 "LAC" 4 "Other") row(1)) ytitle(Volume per capita (cubic meters per inh.), margin(small)) graphregion(margin(tiny)) xtitle(Log per capita GDP (cst 2017 intl dol, PPP), margin(small)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) xlabel(7.5(0.5)11.5) 
* We then manually modify the figure. 
graph use "volpc_econdvt_20unsubregions.gph"
graph export "FinalFigures\scatter_unsubregion.png", replace width(2620) height(1908)

****************
*** FIGURE 6 ***
****************

use country3Dfull, clear
keep if sample == 1 & lpcgdp2010s != .
count
* 204
* We estimate the current construction needs based on deviations from the world regression line in the main figure
gen lpcgdp2010s_sq = lpcgdp2010s*lpcgdp2010s
reg pcstock3D_new lpcgdp2010s lpcgdp2010s_sq [w=totpop], robust
predict pcstock3D_new_p, xb
foreach X in pcstock3D_new {
twoway (qfit `X' lpcgdp2010s [w=totpop], lcolor(blue) lpattern(solid) lwidth(medthick))(scatter pcstock3D_new_p lpcgdp2010s)
}
* cubic m per capita
gen diff_pc = pcstock3D_new_p-pcstock3D_new
* cubic m 
sum totpop, d
sum totpop if country_wb == "Croatia"
gen diff_total = diff_pc*totpop
keep country_wb incgroup2019 pcstock3D_new stock3D_new diff_pc diff_total totpop *region*
* We drop small countries below 1 million and city-states
sum totpop, d
sum totpop if country_wb == "Macao SAR, China"
sum totpop if country_wb == "Hong Kong SAR, China"
sum totpop if country_wb == "Singapore"
sum totpop if country_wb == "Qatar"
sum totpop if country_wb == "United Arab Emirates"
drop if country_wb == "Macao SAR, China"
drop if country_wb == "Hong Kong SAR, China"
drop if country_wb == "Singapore"
drop if country_wb == "United Arab Emirates"
drop if totpop <= 5000000
* We obtain the rank with respect to "incpop" (predicted using both income and pop growth)
gsort- diff_total
gen rank_totvol_incpop = _n
gsort- diff_pc
gen rank_pcvol_incpop = _n
labmask rank_totvol_incpop, values(country_wb)
tab rank_totvol_incpop
tab rank_totvol_incpop, nol
gen label_rank_totvol_incpop = string(rank_totvol_incpop)+". "+country_wb 
* We change some country names so that they take less space in the figure
replace country_wb = "Dominican Rep" if country_wb == "Dominican Republic"
replace country_wb = "Bosnia" if country_wb == "Bosnia and Herzegovina"
replace country_wb = "Egypt" if country_wb == "Egypt, Arab Rep."
replace country_wb = "Russia" if country_wb == "Russian Federation"
* We create the subregion dummies *
gen asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia" | unsubregion == "Melanesia")
gen neoeuro = (unsubregion == "Australia/New Zealand" | unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe" | unsubregion == "North America")
gen lac = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in asia neoeuro lac africa {
replace unregion = "`X'" if `X' == 1
}
save temp, replace
use temp, clear
graph hbar (mean) diff_pc if rank_pcvol_incpop <= 20, over(country_wb, sort(rank_pcvol_incpop) lab(labsize(small))) nofill graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white) margin(small)) ytitle("Per capita volume need 2019-2025 (cubic m per cap)", margin(small))
graph export "FinalFigures\currentneeds1.png", replace width(2620) height(1908)
use temp, clear
* We express in billions 
replace diff_total = diff_total/1000000000
graph hbar (mean) diff_total if rank_totvol_incpop <= 20, over(country_wb, sort(rank_totvol_incpop) lab(labsize(small))) nofill graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white) margin(small)) ytitle("Total volume need 2019-2025 (billion cubic m)", margin(small)) ylabel(0(10)60)
graph export "FinalFigures\currentneeds2.png", replace width(2620) height(1908)
sort rank_totvol_incpop
* We save the gaps for another analysis in this do-file
use temp, clear
keep country_wb diff_pc totpop
sort country_wb
save diff_pc, replace

** REGIONAL NEEDS FOR DEVELOPING ECONOMIES **

* Per capita, for each UN region
use temp, clear
gen H = (incgroup2019 == "H")
sum diff_pc, d
* We drop the countries with an excess relative to the world line
drop if diff_pc < 0
collapse (sum) diff_total totpop, by(unregion H)
gen diff_pc = diff_total/totpop
replace diff_pc = round(diff_pc,1)
gsort- H diff_pc

* Total needs, for each UN region
use temp, clear
gen H = (incgroup2019 == "H")
sum diff_pc, d
* We drop the countries with an excess relative to the world line
drop if diff_pc < 0
replace diff_total = diff_total/1000000000
collapse (sum) diff_total, by(unregion H)
replace diff = round(diff,1)
gsort- H diff_total

* Total needs, for each UN subregion
use temp, clear
gen H = (incgroup2019 == "H")
sum diff_pc, d
* We drop the countries with an excess relative to the world line
drop if diff_pc < 0
replace diff_total = diff_total/1000000000
collapse (sum) diff_total, by(unsubregion H)
replace diff = round(diff,1)
gsort- H diff_total

* Total needs, global level 
use temp, clear
replace diff_total = 0 if diff_total < 0
collapse (sum) stock3D_new diff_total
gen share = diff_total/stock3D_new*100
sum share
* Carbon impact
gen carbonimpact = (61+39*(1+share/100))-100
sum carbonimpact

****************
*** FIGURE 7 ***
****************

* We start with the country level data 
use country3Dfull, clear
keep if sample == 1 & lpcgdp2010s != .
count
* 204
* We obtain the coefficients of the baseline global relationship (population-weighted)
gen lpcgdp2010s_sq = lpcgdp2010s*lpcgdp2010s
reg pcstock3D_new lpcgdp2010s lpcgdp2010s_sq [w=totpop], robust
gen coef_con = _b[_cons]
gen coef_lin = _b[lpcgdp2010s]
gen coef_sq = _b[lpcgdp2010s_sq]
* We keep the 161 countries with forecasted growth rates
keep if avggr1921 != . 
count
* 161
sum avggr1921, d
replace avggr1921 = (avggr1921-1)*100
sum avggr1921, d
tab country_wb if avggr1921 >= 19
* We exclude Guyana that has predicted growth rate of 20%, since it is well above other countries.
drop if country_wb == "Guyana"
* We obtain the per capita volume in 2019
gen lpcgdp2019_sq = lpcgdp2019*lpcgdp2019
gen pcvol19 = coef_con+coef_lin*lpcgdp2019+coef_sq*lpcgdp2019_sq
* We obtain the needed per capita volume in 2025 based on income growth only 
gen lpcgdp2025_inc_sq = lpcgdp2025_inc*lpcgdp2025_inc
gen pcvol25_inc = coef_con+coef_lin*lpcgdp2025_inc+coef_sq*lpcgdp2025_inc_sq
* We obtain the needed per capita volume in 2025 based on income growth + population growth
gen lpcgdp2025_incpop_sq = lpcgdp2025_incpop*lpcgdp2025_incpop
gen pcvol25_incpop = coef_con+coef_lin*lpcgdp2025_incpop+coef_sq*lpcgdp2025_incpop_sq

** VERSION WITH PCGDP GROWTH ONLY 2019-2025 **
* Per capita need 2019-2025
gen pcvoldiff1925_inc = pcvol25_inc-pcvol19
* Total need 2019-2025
gen voldiff1925_inc = pcvoldiff1925_inc*totpop

** VERSION WITH PCGDP GROWTH AND POP GROWTH 2019-2025 **
* Per capita need 2019-2025
gen pcvoldiff1925_incpop = pcvol25_incpop-pcvol19
* Total need = increase per capita x increase in capita
gen diffpop2519 = totpop2025-totpop
* Total increase from same pop x pc volume increase + new pc volume * pop increase 
gen voldiff1925_incpop = pcvoldiff1925_incpop*totpop + (totpop2025-totpop)*pcvol25_incpop
* We express in billions. 
gsort- stock3D_new
order country_wb stock3D_new
replace stock3D_new = stock3D_new / 1000000000
* Comparison 19-25 with income growth and 19-25 with both income and population growth
corr pcvoldiff1925_incpop pcvoldiff1925_inc
* 0.84
corr voldiff1925_incpop voldiff1925_inc
* 1.00

* We keep some variables
keep country_wb unsubregion incgroup* stock3D_new pcvoldiff1925_incpop pcvoldiff1925_inc voldiff1925_incpop voldiff1925_inc totpop wbregion totpop2025 unsubregion
* We obtain the total population change
gen chgpop1925 = totpop2025 - totpop

* We round the predicted values and label the variables
replace pcvoldiff1925_incpop = round(pcvoldiff1925_incpop,0.1)
replace pcvoldiff1925_inc = round(pcvoldiff1925_inc,0.1)
label var pcvoldiff1925_incpop "Per capita volume change 2019-2025, inc pop gr (cubic m)"
label var pcvoldiff1925_inc "Per capita volume change 2019-2025, inc gr (cubic m)"
sum pcvoldiff1925*
replace voldiff1925_incpop = voldiff1925_incpop/1000000000
replace voldiff1925_incpop = round(voldiff1925_incpop,1)
replace voldiff1925_inc = voldiff1925_inc/1000000000
replace voldiff1925_inc = round(voldiff1925_inc,1)
label var voldiff1925_incpop "Volume change 2019-2025, inc pop gr (bil cubic m)"
label var voldiff1925_inc "Volume change 2019-2025, inc gr (bil cubic m)"
sum voldiff1925*

* We obtain the rank with respect to "incpop" (predicted using both income and pop growth)
gsort- voldiff1925_incpop
gen rank_totvol_incpop = _n
gsort- pcvoldiff1925_incpop
gen rank_pcvol_incpop = _n
labmask rank_totvol_incpop, values(country_wb)
tab rank_totvol_incpop
order rank_pcvol_incpop
tab rank_totvol_incpop, nol
gen label_rank_totvol_incpop = string(rank_totvol_incpop)+". "+country_wb 
* We change some country names so that they take less space in the figure
replace country_wb = "Dominican Rep" if country_wb == "Dominican Republic"
replace country_wb = "Bosnia" if country_wb == "Bosnia and Herzegovina"
replace country_wb = "Egypt" if country_wb == "Egypt, Arab Rep."
replace country_wb = "Russia" if country_wb == "Russian Federation"
* We create the subregion dummies *
gen asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia" | unsubregion == "Melanesia")
gen neoeuro = (unsubregion == "Australia/New Zealand" | unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe" | unsubregion == "North America")
gen lac = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in asia neoeuro lac africa {
replace unregion = "`X'" if `X' == 1
}
* Figure - Panel(a)
graph hbar (mean) pcvoldiff1925_incpop if rank_pcvol_incpop <= 20, over(country_wb, sort(rank_pcvol_incpop) lab(labsize(small))) nofill graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white) margin(small)) ytitle("Per capita volume need 2019-2025 (cubic m per cap)", margin(small)) 
graph use "futurepcneeds_sr.gph"
graph export "FinalFigures\futureneeds2025_1.png", replace width(2620) height(1908)
* Figure - Panel(b)
graph hbar (mean) voldiff1925_incpop if rank_totvol_incpop <= 20, over(country_wb, sort(rank_totvol_incpop) lab(labsize(small))) nofill graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white) margin(small)) ytitle("Total volume need 2019-2025 (billion cubic m)", margin(small)) ylabel(0(25)125)
graph use "futuretotalneeds_sr.gph"
graph export "FinalFigures\futureneeds2025_2.png", replace width(2620) height(1908)
ren pcvoldiff1925_incpop diff_pc
ren voldiff1925_incpop diff_total
keep country_wb *region incgroup2019 diff* stock3D_new
save temp, replace

** REGIONAL NEEDS FOR DEVELOPING ECONOMIES **

* Total needs, for each UN region
use temp, clear
gen H = (incgroup2019 == "H")
gsort- diff_total
collapse (sum) diff_total, by(unregion H)
replace diff = round(diff,1)
gsort- H diff_total

* Total needs, global level 
use temp, clear
collapse (sum) stock3D_new diff_total
gen share = diff_total/stock3D_new*100
sum share
* Carbon impact
gen carbonimpact = (61+39*(1+share/100))-100
sum carbonimpact

****************
*** FIGURE 8 ***
****************

* We start with the country level data 
use country3Dfull, clear
keep if sample == 1 & lpcgdp2010s != .
count
* 204
* future construction needs = stock per capita * pop increase
* Total need = increase per capita x increase in capita
gen diffpop5019 = totpop2050-totpop
gsort- diffpop5019
* Stock per capita
sum pcstock3D_new, d
* cubic m per capita, from 19 to 874
gen totalneed  = diffpop5019*pcstock3D_new
* In cubic m
* We express in billions. 
gsort- stock3D_new
order country_wb stock3D_new
replace totalneed = totalneed / 100000000
drop if totalneed == .
replace totalneed = 0 if totalneed < 0
* We replace the negative values by 0
* These are the countries
replace totalneed = round(totalneed,0.1)
sum totalneed, d
corr diffpop5019 totalneed
* correlation below 1 (0.79)
* In growth rates:
gen pctgrowth_pop = (diffpop5019)/totpop*100
sum pctgrowth_pop, d
* mean = 33.0%
replace stock3D_new = stock3D_new / 1000000000
gen pctgrowth_vol = (totalneed)/stock3D_new*100
sum pctgrowth_vol, d
* mean = 35%
corr pctgrowth_*
* 0.98
keep country_w diffpop5019 stock3D_new incgroup* totalneed unsubregion wbregion
* We obtain the rank with respect to "incpop" (predicted using both income and pop growth)
gsort- totalneed
gen rank_totalneed = _n
labmask rank_totalneed, values(country_wb)
gsort- rank_totalneed
tab rank_totalneed
tab rank_totalneed, nol
gen label_rank_totalneed = string(rank_totalneed)+". "+country_wb
* We change some country names so that they take less space in the figure
replace country_wb = "Dominican Rep" if country_wb == "Dominican Republic"
replace country_wb = "Bosnia" if country_wb == "Bosnia and Herzegovina"
replace country_wb = "Egypt" if country_wb == "Egypt, Arab Rep."
replace country_wb = "Russia" if country_wb == "Russian Federation"
* We create the subregion dummies *
gen asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia" | unsubregion == "Melanesia")
gen neoeuro = (unsubregion == "Australia/New Zealand" | unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe" | unsubregion == "North America")
gen lac = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in asia neoeuro lac africa {
replace unregion = "`X'" if `X' == 1
}
sum totalneed, d
* Figure - Panel(b)
sort label_rank_totalneed
graph hbar (mean) totalneed if rank_totalneed <= 20, over(country_wb, sort(rank_totalneed) lab(labsize(small))) nofill graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white) margin(small)) ytitle("Total volume need 2019-2050 (billion cubic m)", margin(small)) ylabel(0(25)250)
graph use "futuretotalneeds_lr.gph"
graph export "FinalFigures\futureneeds2050.png", replace width(2620) height(1908)
ren totalneed diff_total
keep country_wb diffpop5019 *region incgroup2019 diff* stock3D_new
save temp, replace

** REGIONAL NEEDS FOR DEVELOPING ECONOMIES **

* Total needs, for each UN region
use temp, clear
gen H = (incgroup2019 == "H")
gsort- diff_total
collapse (sum) diff_total diffpop5019, by(unregion H)
replace diff_total = round(diff_total,1)
gsort- H diff_total

* Total needs, global level 
use temp, clear
collapse (sum) stock3D_new diff_total
gen share = diff_total/stock3D_new*100
sum share
* Carbon impact
gen carbonimpact = (61+39*(1+share/100))-100
sum carbonimpact

****************
*** FIGURE 9 ***
****************
*Total Building Area per Capita and Economic Development, 2010s

use country3Dfull, clear
keep if sample == 1 & lpcgdp2010s != .
count
* 204
foreach X in pcstock2D {
twoway (scatter `X' lpcgdp2010s [w=totpop], msymbol(circle_hollow) mcolor(ltblue))(scatter `X' lpcgdp2010s if rankpop <= 50, mlabel(ccode) mlabposition(9) msymbol(point) msize(tiny) mcolor(white) mlabcolor(blue) mlabsize(tiny))(qfit `X' lpcgdp2010s [w=totpop], lcolor(gs6) lpattern(dash) lwidth(medthick)), legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ytitle(Built-Up Area per capita (sq m per inh.), margin(medsmall)) xtitle(Log per capita GDP (cst 2017 intl dol, PPP), margin(small)) graphregion(margin(tiny)) ylabel(0(50)150)
}

* We obtain the mean volume per capita for all cities in a country
use city3Dfull, clear
collapse (sum) stock2D P15, by(country_wb) 
gen stockpc = stock2D/P15
sum stockpc
ren stock2D stock_city
ren P15 totpop_city
ren stockpc stockpc_city
keep country_wb stock_city stockpc_city totpop_city
sort country_wb
save temp_city, replace

* We obtain the mean volume per capita for all mega-cities (>= 5 million) in a country
use city3Dfull, clear
keep if P15 >= 5000000
collapse (sum) stock2D P15, by(country_wb) 
gen stockpc = stock2D/P15
sum stockpc
* 193 cubic m per capita
ren stock2D stock_megacity
ren P15 totpop_megacity
ren stockpc stockpc_megacity
keep country_wb stock_megacity stockpc_megacity totpop_megacity
sort country_wb
save temp_megacity, replace

* We obtain the mean volume per capita for all rural areas & small towns in a country
use country3Dfull, clear
collapse (sum) stock2D totpop2015, by(country_wb) 
ren stock2D stock_cntry
ren totpop2015 totpop_cntry
* We do so by removing the total for urban areas from the total using all localities (i.e., the data at the country level).
sort country_wb
merge country_wb using temp_city
tab _m
drop _m
replace stock_city = 0 if stock_city == .
gen stock_rural = stock_cntry-stock_city
sum stock_rural, d
replace stock_rural = 0 if stock_rural == .
gen totpop_rural = totpop_cntry-totpop_city
sum totpop_rural, d
tab country_wb if totpop_rural < 0
replace totpop_rura = 0 if totpop_rura < 0
gen stockpc_rural = stock_rural/totpop_rural
sum stockpc_rural
* 240 cubic m per capita
keep country_wb stock_rural stockpc_rural totpop_rural
sort country_wb
save temp_rural, replace

* We add the estimates averages to the country-level averages. 
use country3Dfull, clear
keep if sample == 1 & lpcgdp2010s != .
count
* 204
corr lpcgdp*
sort country_wb
merge country_wb using temp_city
tab _m
drop _m
sort country_wb
merge country_wb using temp_rural
tab _m
drop _m
sort country_wb
merge country_wb using temp_megacity
tab _m
drop _m

foreach X in pcstock2D {
twoway (qfit `X' lpcgdp2010s [w=totpop], lcolor(black) lpattern(solid) lwidth(medthick))(qfit stockpc_megacity lpcgdp2010s [w=totpop_city], lcolor(blue) lpattern(longdash) lwidth(medthick))(qfit stockpc_city lpcgdp2010s [w=totpop_city], lcolor(lightblue) lpattern(longdash) lwidth(medthick))(qfit stockpc_rural lpcgdp2010s [w=totpop_rural], lcolor(green) lpattern(dash) lwidth(medthick))(scatter `X' lpcgdp2010s [w=totpop], msymbol(circle_hollow) mcolor(ltblue))(scatter `X' lpcgdp2010s if rankpop <= 50, mlabel(ccode) mlabposition(9) msymbol(point) msize(tiny) mcolor(white) mlabcolor(blue) mlabsize(tiny)), legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ytitle(Volume per capita (cubic meters per inh.), margin(medsmall)) xtitle(Log per capita GDP (cst 2017 intl dol, PPP), margin(small)) graphregion(margin(tiny)) legend(order(1 "Overall" 2 "Cities" 3 "Rural+Towns") row(1)) ylabel(0(50)150)
}
* We have to create in the same style as the main figure *
graph use "volpc_econdvt_new.gph"
graph use "builtup_econdvt_new.gph"
graph export "FinalFigures\builtup_econdvt_new.png", replace width(2620) height(1908)

*****************
*** FIGURE 10 ***
*****************
*Average Building Height and Economic Development, 2010s.

use country3Dfull, clear
keep if sample == 1 & lpcgdp2010s != .
count
* 204
foreach X in avght_new_div {
sum avght_new_div, d
* We do not show HKG, MAC, SGP. Otherwise, the figure is impossible to read. 
twoway (scatter `X' lpcgdp2010s [w=totpop] if `X' <= 15, msymbol(circle_hollow) mcolor(ltblue))(scatter `X' lpcgdp2010s if (rankpop <= 50) & `X' <= 15, mlabel(ccode) mlabposition(9) msymbol(point) msize(tiny) mcolor(white) mlabcolor(blue) mlabsize(tiny))(qfit `X' lpcgdp2010s [w=totpop], lcolor(gs6) lpattern(dash) lwidth(medthick)), legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ytitle(Average Building Height (m), margin(medsmall)) xtitle(Log per capita GDP (cst 2017 intl dol), margin(small)) graphregion(margin(tiny)) ylabel(5(5)30)
}

* We obtain the mean volume per capita for all cities in a country
use city3Dfull, clear
collapse (sum) stock3D_new stock2D P15, by(country_wb) 
gen avght_new_div = stock3D_new/stock2D
ren stock2D stock2D_city
ren stock3D_new stock3D_new_city
ren avght_new_div avght_new_div_city
ren P15 totpop_city
keep country_wb *_city
sort country_wb
save temp_city, replace

* We obtain the mean volume per capita for all mega-cities (>= 5 million) in a country
use city3Dfull, clear
keep if P15 >= 5000000
collapse (sum) stock3D_new stock2D P15, by(country_wb) 
gen avght_new_div = stock3D_new/stock2D
ren stock2D stock2D_megacity
ren stock3D_new stock3D_new_megacity
ren avght_new_div avght_new_div_megacity
ren P15 totpop_megacity
keep country_wb *_megacity
sort country_wb
save temp_megacity, replace

* We obtain the mean volume per capita for all rural areas & small towns in a country
use country3Dfull, clear
collapse (sum) stock3D_new stock2D totpop2015, by(country_wb) 
ren stock3D_new stock3D_new_cntry
ren stock2D stock2D_cntry
ren totpop2015 totpop_cntry
* We do so by removing the total for urban areas from the total using all localities (i.e., the data at the country level).
sort country_wb
merge country_wb using temp_city
tab _m
drop _m
foreach X in stock3D_new stock2D {
replace `X'_city = 0 if `X'_city == .
gen `X'_rural = `X'_cntry-`X'_city
sum `X'_rural, d
replace `X'_rural = 0 if `X'_rural == .
replace `X'_rural = 0 if `X'_rural < 0
}
gen totpop_rural = totpop_cntry-totpop_city
sum totpop_rural, d
replace totpop_rura = 0 if totpop_rura < 0
gen avght_new_div_rural = stock3D_new_rural/stock2D_rural
keep country_wb *_rural
sort country_wb
save temp_rural, replace

* We add the estimates averages to the country-level averages. 
use country3Dfull, clear
keep if sample == 1 & lpcgdp2010s != .
count
* 204
corr lpcgdp*
sort country_wb
merge country_wb using temp_city
tab _m
drop _m
sort country_wb
merge country_wb using temp_rural
tab _m
drop _m
sort country_wb
merge country_wb using temp_megacity
tab _m
drop _m

sum avght_new_div, d
tab country_wb if avght_new_div > 10.5 & avght_new_div != .
* We will add the following countries manually with an arrow: 
* HKG, KOR, MAC, SGP, TWN
drop if avght_new_div > 10.5

foreach X in avght_new_div {
twoway (qfit `X' lpcgdp2010s [w=totpop], lcolor(black) lpattern(solid) lwidth(medthick))(qfit `X'_megacity lpcgdp2010s [w=totpop_city], lcolor(blue) lpattern(longdash) lwidth(medthick))(qfit `X'_city lpcgdp2010s [w=totpop_city], lcolor(gs8) lpattern(longdash) lwidth(medthick))(qfit `X'_rural lpcgdp2010s [w=totpop_rural], lcolor(green) lpattern(dash) lwidth(medthick))(scatter `X' lpcgdp2010s [w=totpop], msymbol(circle_hollow) mcolor(ltblue))(scatter `X' lpcgdp2010s if rankpop <= 50, mlabel(ccode) mlabposition(9) msymbol(point) msize(tiny) mcolor(white) mlabcolor(blue) mlabsize(tiny)), legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ytitle(Volume per capita (cubic meters per inh.), margin(medsmall)) xtitle(Log per capita GDP (cst 2017 intl dol, PPP), margin(small)) graphregion(margin(tiny)) legend(order(1 "Overall" 2 "Cities" 3 "Rural+Towns") row(1)) ylabel(2(2)10)
}
* We have to create in the same style as the main figure *
graph use "volpc_econdvt_new.gph"
graph use "avgbdgheight_econdvt_new.gph"
graph export "FinalFigures\avgbdgheight_econdvt_new.png", replace width(2620) height(1908)

*****************
*** FIGURE 11 ***
*****************
* Total Building Volume Per Capita and Economic Development, 2010s

use country3Dfull, clear
keep if sample == 1 & lpcgdp2010s != .
count
* 204
twoway (qfit pcstock3D_2D lpcgdp2010s [w=totpop], lcolor(midblue) lpattern(shortdash) lwidth(thick))(qfit pcstock3D_old lpcgdp2010s [w=totpop], lcolor(blue) lpattern(longdash) lwidth(thick))(qfit pcstock3D_new lpcgdp2010s [w=totpop], lcolor(dknavy) lwidth(thick)), legend(order(1 "Outward" 2 "Total Excl. High-Rises (HRs)" 3 "Incl. HRs") row(1)) ytitle(Volume per capita (cubic meters per inh.), margin(medium)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) xtitle(Log per capita GDP (cst 2017 intl dol, PPP), margin(small)) graphregion(margin(tiny)) ylabel(100(100)800)
* We modify manually
graph use "decomposition.gph"
graph export "FinalFigures\decomposition.png", replace width(2620) height(1908)

*****************
*** FIGURE 12 ***
*****************

use country3Dfull, clear
keep if sample == 1 & lpcgdp2010s != .
count
* 204
corr lpcgdp*
* We first obtain the residuals of the main variables relative to the global trend line. 
gen lpcgdp2010s_sq = lpcgdp2010s*lpcgdp2010s
foreach X in pcstock3D_new pcstock2D pcstock3D_no2D pcstock3D_LRno2D pcstock3D_HR {
reg `X' lpcgdp2010s lpcgdp2010s_sq 
predict `X'_resid_unw, resid
reg `X' lpcgdp2010s lpcgdp2010s_sq [w=totpop]
predict `X'_resid_wt, resid
}
foreach X in lpcstock3D_new  lpcstock2D lpcstock3D_no2D lpcstock3D_LRno2D lpcstock3D_HR {
reg `X' lpcgdp2010s lpcgdp2010s_sq 
predict `X'_resid_unw, resid
reg `X' lpcgdp2010s lpcgdp2010s_sq [w=totpop]
predict `X'_resid_wt, resid
}

* We do the analysis checking the correlation between the overall gaps and the residuals
sort country_wb
merge country_wb using diff_pc
tab _m
drop _m
* diff_pc = distance to the line
* negative value means excess
* whereas for lpcstock3D_new_resid_wt, positive value means excess
corr diff_pc lpcstock3D_new_resid_wt
corr diff_pc lpcstock3D_new_resid_wt [w=totpop]
* ok, above 0.8
foreach X in lpcstock3D_new  lpcstock2D lpcstock3D_no2D lpcstock3D_LRno2D lpcstock3D_HR {
desc `X'_resid_wt, f
corr diff_pc `X'_resid_wt
corr diff_pc `X'_resid_wt [w=totpop]
}
* Strong correlation with 2D, upward, LR
* Weak correlation HR
foreach X in lpcstock2D lpcstock3D_no2D lpcstock3D_LRno2D lpcstock3D_HR {
desc `X'_resid_wt, f
corr lpcstock3D_new_resid_wt `X'_resid_wt
corr lpcstock3D_new_resid_wt `X'_resid_wt [w=totpop]
}
* Stronger correlation with 2D, upward, LR
* Weak correlation HR

* Correlation residuals for upward and outward
corr lpcstock2D_resid_wt lpcstock3D_no2D_resid_wt [w=totpop]
* 0.7 (countries with good heights have good areas)

* We obtain the residuals showing specialization for heights vs area
reg lpcstock3D_no2D_resid_wt lpcstock2D_resid_wt [w=totpop]
predict proht, resid
gsort- proht
* We obtain the residuals showing specialization for high-rises vs low-rises
reg lpcstock3D_HR lpcstock3D_LRno2D_resid_wt [w=totpop]
predict proHR, resid
gsort- proHR
* Before we create the figure, we save the classification residuals to produce the figure at the subregional level. 
save classification_temp, replace

* Selected figure: top 40 countries and showing three outliers with arrows. 
keep if rankpop <= 50 
sort rankpop
keep if rankpop <= 40 
twoway (scatter proHR proht [w=totpop] if rankpop <= 40, msymbol(circle_hollow) mcolor(ltblue)) (scatter proHR proht if rankpop <= 40, mlabel(ccode) msymbol(point) msize(tiny) mcolor(white) mlabcolor(blue) mlabsize(tiny)), ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs10)) yline(0, lpattern(longdash) lcolor(gs10)) legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) 
drop if ccode == "SDN" | ccode == "KOR" | ccode == "COD"
twoway (scatter proHR proht [w=totpop] if rankpop <= 40, msymbol(circle_hollow) mcolor(ltblue)) (scatter proHR proht if rankpop <= 40, mlabel(ccode) msymbol(point) msize(tiny) mcolor(white) mlabcolor(blue) mlabsize(tiny)), ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs10)) yline(0, lpattern(longdash) lcolor(gs10)) legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) xlabel(-0.5(0.25)0.5) ylabel(-3(1)3)
graph use "classif_cntries_area_heights.gph"
graph export "FinalFigures\classif_cntries_area_heights.png", replace width(2620) height(1908)

* These are the country specialization residuals that we use for the city analysis below
use classification_temp, clear
keep country_wb ccode proHR proht totpop 
ren totpop totpop_cntry_agg
ren proHR proHR_cntry_agg
ren proht proht_cntry_agg
sort country_wb
save country_residuals, replace

*****************
*** FIGURE 13 ***
*****************

* We use the temporary file that we just created for figure 7 just above. 
use classification_temp, clear
* We obtain the subregional averages. 
collapse (mean) proHR proht [w=totpop], by(unsubregion)
* We add the total population of each subregion. 
sort unsubregion
merge unsubregion using totpopsubregion
tab _m
drop _m
* We create the main groupings used in the figure. 
gen asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia" | unsubregion == "Melanesia")
gen neoeuro = (unsubregion == "Australia/New Zealand" | unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe" | unsubregion == "North America")
gen lac = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in asia neoeuro lac africa {
replace unregion = "`X'" if `X' == 1
}
tab unregion, m
bysort unregion: tab unsubregion
* We create the figure
twoway (scatter proHR proht if unregion == "africa", mlabel(unsubregion) mcolor(pink) mlabcolor(pink) msymbol(circle_hollow))(scatter proHR proht if unregion == "asia", mlabel(unsubregion) mcolor(green) mlabcolor(green) msymbol(square_hollow))(scatter proHR proht if unregion == "lac", mlabel(unsubregion) mcolor(red) mlabcolor(red) msymbol(triangle_hollow))(scatter proHR proht if unregion == "neoeuro", mlabel(unsubregion) mcolor(blue) mlabcolor(blue) msymbol(diamond_hollow)), legend(order(1 "Africa" 2 "Asia-Pacific" 3 "LAC" 4 "Other") row(1)) ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs10)) yline(0, lpattern(longdash) lcolor(gs10)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ylabel(-5(1)3) xlabel(-0.5(0.5)0.75)
* We modify manually and export
graph use "classif_unsubregions_area_ht.gph"
graph export "FinalFigures\reg_classification.png", replace width(2620) height(1908)
* New figure with the lines *
twoway (lfit proHR proht if unregion == "africa", lcolor(pink) lwidth(thick) lpattern(solid))(lfit proHR proht if unregion == "asia", lcolor(green) lwidth(thick) lpattern(dash))(lfit proHR proht if unregion == "lac", lcolor(red) lwidth(thick) lpattern(shortdash)) (lfit proHR proht if unregion == "neoeuro", lcolor(blue) lwidth(thick) lpattern(longdash))(scatter proHR proht if unregion == "africa", mlabel(unsubregion) mcolor(pink) mlabcolor(pink) msymbol(circle_hollow))(scatter proHR proht if unregion == "asia", mlabel(unsubregion) mcolor(green) mlabcolor(green) msymbol(square_hollow))(scatter proHR proht if unregion == "lac", mlabel(unsubregion) mcolor(red) mlabcolor(red) msymbol(triangle_hollow))(scatter proHR proht if unregion == "neoeuro", mlabel(unsubregion) mcolor(blue) mlabcolor(blue) msymbol(diamond_hollow)), legend(order(1 "Africa" 2 "Asia-Pacific" 3 "LAC" 4 "Other") row(1)) ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs10)) yline(0, lpattern(longdash) lcolor(gs10)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ylabel(-3(1)2) xlabel(-0.5(0.5)0.5)
graph use "reg_classification_lines.gph"
graph export "FinalFigures\reg_classification_lines.png", replace width(2620) height(1908)

****************************
*** FIGURE 14, FIGURE 15 ***
****************************

* We first get the city-level residuals and measures of specialization
use city3Dfull, clear
count
* 12,831 
* We create the log pc GDP variable. 
* Divided by pop in inh
*gen lpcGDP2015 = log(GDP_2015/1000)/log(P15)
* We obtain the residuals
gen lpcGDP2015_sq = lpcGDP2015*lpcGDP2015
foreach X in pcstock2D pcstock3D_no2D pcstock3D_LRno2D pcstock3D_HR {
reg `X' lpcGDP2015 lpcGDP2015_sq 
predict `X'_resid_unw, resid
reg `X' lpcGDP2015 lpcGDP2015_sq  [w=totpop]
predict `X'_resid_wt, resid
}
foreach X in lpcstock2D lpcstock3D_no2D lpcstock3D_LRno2D lpcstock3D_HR {
reg `X' lpcGDP2015 lpcGDP2015_sq  
predict `X'_resid_unw, resid
reg `X' lpcGDP2015 lpcGDP2015_sq  [w=totpop]
predict `X'_resid_wt, resid
}
* We obtain the specialization measures
* ... showing specialization for heights vs area
reg lpcstock3D_no2D_resid_wt lpcstock2D_resid_wt [w=totpop]
predict proht, resid
gsort- proht
* ... showing specialization for high-rises vs low-rises
reg lpcstock3D_HR lpcstock3D_LRno2D_resid_wt [w=totpop]
predict proHR, resid
gsort- proHR
* We obtain the population rank of each city in each country in 2015
gsort ccode- P15
bysort ccode: gen rankcity = _n
order rankcity
* We now add the country-level measures
sort country_wb 
merge country_wb using country_residuals
tab _m
drop _m
gen ccode2 = "|"+ccode+"|"
gen top15 = (rankcity <= 15) if rankcity != . 
* We save this data set for later. 
save tempcities, replace

***** FIGURE 14 *****

*** USA ***
foreach X in USA {
twoway (scatter proHR proht if ccode == "`X'" & rankcity <= 15, msymbol(circle_hollow) msize(medium) mcolor(black))(scatter proHR proht if ccode == "`X'" & rankcity <= 15, mlabel(cityname) msymbol(point) msize(tiny) mcolor(white) mlabcolor(black) mlabsize(small))||(scatter proHR_cntry_agg proht_cntry_agg if ccode == "`X'" & rankcity == 1, mlabel(ccode2) mcolor(black) msymbol(diamond_hollow) mlabsize(medlarge) mlabcolor(black)) ||, ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs8)) yline(0, lpattern(longdash) lcolor(gs8)) legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
}
* We modify manually
graph use "primary_USA.gph"
graph export "FinalFigures\primary_USA.png", replace width(2620) height(1908)

*** CHINA ***
foreach X in CHN {
twoway (scatter proHR proht if ccode == "`X'" & rankcity <= 15, msymbol(circle_hollow) msize(medium) mcolor(black))(scatter proHR proht if ccode == "`X'" & rankcity <= 15, mlabel(cityname) msymbol(point) msize(tiny) mcolor(white) mlabcolor(black) mlabsize(small))||(scatter proHR_cntry_agg proht_cntry_agg if ccode == "`X'" & rankcity == 1, mlabel(ccode2) mcolor(black) msymbol(diamond_hollow) mlabsize(medlarge) mlabcolor(black)) ||, ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs8)) yline(0, lpattern(longdash) lcolor(gs8)) legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
}
* We modify manually
graph use "primary_CHN.gph"
graph export "FinalFigures\primary_CHN.png", replace width(2620) height(1908)

*** INDIA ***
foreach X in IND {
twoway (scatter proHR proht if ccode == "`X'" & rankcity <= 15, msymbol(circle_hollow) msize(medium) mcolor(black))(scatter proHR proht if ccode == "`X'" & rankcity <= 15, mlabel(cityname) msymbol(point) msize(tiny) mcolor(white) mlabcolor(black) mlabsize(small))||(scatter proHR_cntry_agg proht_cntry_agg if ccode == "`X'" & rankcity == 1, mlabel(ccode2) mcolor(black) msymbol(diamond_hollow) mlabsize(medlarge) mlabcolor(black)) ||, ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs8)) yline(0, lpattern(longdash) lcolor(gs8)) legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
}
* We modify manually
graph use "primary_IND.gph"
graph export "FinalFigures\primary_IND.png", replace width(2620) height(1908)

*** BRAZIL ***
foreach X in BRA {
twoway (scatter proHR proht if ccode == "`X'" & rankcity <= 15, msymbol(circle_hollow) msize(medium) mcolor(black))(scatter proHR proht if ccode == "`X'" & rankcity <= 15, mlabel(cityname) msymbol(point) msize(tiny) mcolor(white) mlabcolor(black) mlabsize(small))||(scatter proHR_cntry_agg proht_cntry_agg if ccode == "`X'" & rankcity == 1, mlabel(ccode2) mcolor(black) msymbol(diamond_hollow) mlabsize(medlarge) mlabcolor(black)) ||, ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs8)) yline(0, lpattern(longdash) lcolor(gs8)) legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
}
* We modify manually
graph use "primary_BRA.gph"
graph export "FinalFigures\primary_BRA.png", replace width(2620) height(1908)

*** SOUTH AFRICA ***
* For ZAF, among the top 15 cities, some cities have no high-rises.
* For these, they have a value towards low-rises.
* We thus use the lowest proHR value.
sum proHR proht if ccode == "ZAF"
sum proHR if ccode == "ZAF" & rankcity <= 15
* -1.686044
replace proHR = -1.686044 if proHR == . & ccode == "ZAF" & rankcity <= 15
foreach X in ZAF {
twoway (scatter proHR proht if ccode == "`X'" & rankcity <= 15, msymbol(circle_hollow) msize(medium) mcolor(black))(scatter proHR proht if ccode == "`X'" & rankcity <= 15, mlabel(cityname) msymbol(point) msize(tiny) mcolor(white) mlabcolor(black) mlabsize(small))||(scatter proHR_cntry_agg proht_cntry_agg if ccode == "`X'" & rankcity == 1, mlabel(ccode2) mcolor(black) msymbol(diamond_hollow) mlabsize(medlarge) mlabcolor(black)) ||, ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs8)) yline(0, lpattern(longdash) lcolor(gs8)) legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
}
* We modify manually
graph use "primary_ZAF.gph"
graph export "FinalFigures\primary_ZAF.png", replace width(2620) height(1908)

*** NIGERIA ***
* For NGA, among the top 15 cities, some cities have no high-rises.
* For these, they have a value towards low-rises.
* We thus use the lowest proHR value.
sum proHR proht if ccode == "NGA"
sum proHR if ccode == "NGA" & rankcity <= 15
* -13.05612 
replace proHR = -13.05612 if proHR == . & ccode == "NGA" & rankcity <= 15
foreach X in NGA {
twoway (scatter proHR proht if ccode == "`X'" & rankcity <= 15, msymbol(circle_hollow) msize(medium) mcolor(black))(scatter proHR proht if ccode == "`X'" & rankcity <= 15, mlabel(cityname) msymbol(point) msize(tiny) mcolor(white) mlabcolor(black) mlabsize(small))||(scatter proHR_cntry_agg proht_cntry_agg if ccode == "`X'" & rankcity == 1, mlabel(ccode2) mcolor(black) msymbol(diamond_hollow) mlabsize(medlarge) mlabcolor(black)) ||, ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs8)) yline(0, lpattern(longdash) lcolor(gs8)) legend(off) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
}
* We modify manually
graph use "primary_NGA.gph"
graph export "FinalFigures\primary_NGA.png", replace width(2620) height(1908)

***** FIGURE 15 *****

* We obtain the ranking in terms of volume 
gsort- stock3D_new
gen rank_vol15 = _n
* We keep cities among the top 30 in either volume or population
keep if rank_vol15 <= 30 | rank_pop15 <= 30
count
* 42
* We rename some cities and create groupings. 
replace cityname = "Sao Paulo" if cityname == "SÃ£o Paulo"
replace cityname = "Manila" if cityname == "Quezon City [Manila]"
replace cityname = "Delhi" if cityname == "Delhi [New Delhi]"
replace cityname = "Osaka" if cityname == "Osaka [Kyoto]"
replace cityname = "Taipei" if cityname == "New Taipei [Taipei]"
gen asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia" | unsubregion == "Melanesia")
gen neoeuro = (unsubregion == "Australia/New Zealand" | unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe" | unsubregion == "North America")
gen lac = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
twoway (scatter proHR proht if africa == 1, mlabel(cityname) mcolor(pink) mlabcolor(pink) msymbol(circle_hollow))(scatter proHR proht if asia == 1, mlabel(cityname) mcolor(green) mlabcolor(green) msymbol(square_hollow))(scatter proHR proht if lac == 1, mlabel(cityname) mcolor(red) mlabcolor(red) msymbol(triangle_hollow))(scatter proHR proht if neoeuro == 1, mlabel(cityname) mcolor(blue) mlabcolor(blue) msymbol(diamond_hollow)), legend(order(1 "Africa" 2 "Asia-Pacific" 3 "LAC" 4 "Other") row(1)) ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs10)) yline(0, lpattern(longdash) lcolor(gs10)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
* We modify manually and export
graph use "cities_TOP30.gph"
graph export "FinalFigures\city_classification_top30.png", replace width(2620) height(1908)

************************************************************************************************************

************************
*** APPENDIX FIGURES ***
************************

* Appendix figures B.1 and B.2 are created in their own folder
* Appendix figures C.1 and C.2 are created in its own folder

***************************
*** APPENDIX FIGURE C.3 ***
***************************
* City Volume Per Capita and City Income Per Capita, 2010s

use city3Dfull, clear
count
* 12,831 
sum pcstock3D_new, d
* We exclude the 99p outliers for the scatterplot
foreach X in pcstock3D_new {
twoway (scatter `X' lpcGDP2015 if pcstock3D_new <= 787.3749, msymbol(circle_hollow) msize(small) mcolor(ltblue))(fpfit `X' lpcGDP2015 [w=P15], lcolor(dknavy) lwidth(medthick)), legend(off) ytitle(`X') ytitle(Volume per capita (cubic meters per inh.), margin(medsmall)) xtitle(Log(city real GDP) / Log(city population), margin(medsmall)) graphregion(margin(tiny)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
}
graph export "FinalFigures\city_pcstock3D_scatter_w.png", replace width(2620) height(1908)

*** FOR ALL ECONOMIES ***
* ELASTICITIES
* Slopes, unweighted:
foreach X in pcstock3D_new pcstock3D_2D pcstock3D_no2D {
reg `X' lpcGDP2015, robust
}
* 444*** 152*** 292***
* Share upward = 292.3912 / 444.2993*100 = 66% 
foreach X in pcstock3D_no2D pcstock3D_LRno2D pcstock3D_HR {
reg `X' lpcGDP2015, robust
}
* 292*** 281*** 12***
* Share high rises in upward = 280.7617 / 292.3912*100 = 96%
* Slopes, weighted:
foreach X in pcstock3D_new pcstock3D_2D pcstock3D_no2D {
reg `X' lpcGDP2015 [w=totpop], robust
}
* 283*** 74*** 209*** 
* Share upward = 209.1462 / 283.19383*100 = 74% 
foreach X in pcstock3D_no2D pcstock3D_LRno2D pcstock3D_HR {
reg `X' lpcGDP2015 [w=totpop], robust
}
* 209*** 184*** 25*** 
* Share high rises in upward = 184.495  / 209.1462 *100 = 88%
* We say between two thirds and three fourths for upward
* We say 90% for high-rises as part of upward

*** FOR DEVELOPING ECONOMIES ***
drop if incgroup2019 == "H"
* ELASTICITIES
* Slopes, unweighted:
foreach X in pcstock3D_new pcstock3D_2D pcstock3D_no2D {
reg `X' lpcGDP2015, robust
}
* Share upward = 221.2418/333.0878*100 = 66% 
foreach X in pcstock3D_no2D pcstock3D_LRno2D pcstock3D_HR {
reg `X' lpcGDP2015, robust
}
* Share high rises in upward = 215.6538 / 221.2418*100 = 97%
* Slopes, weighted:
foreach X in pcstock3D_new pcstock3D_2D pcstock3D_no2D {
reg `X' lpcGDP2015 [w=totpop], robust
}
* Share upward = 161.1721 / 208.8724*100 = 77% 
foreach X in pcstock3D_no2D pcstock3D_LRno2D pcstock3D_HR {
reg `X' lpcGDP2015 [w=totpop], robust
}
* Share high rises in upward = 145.2548    /  161.172 *100 =90%
* We obtain the same patterns as when considering all countries

***************************
*** APPENDIX FIGURE C.4 ***
***************************
* City Total Building Area Per Capita and City Income Per Capita, 2015

use city3Dfull, clear
count
* 12,831 
* We exclude the 99p outliers for the scatter
foreach X in pcstock2D {
twoway (scatter `X' lpcGDP2015 if pcstock2D <= 115.4423, msymbol(circle_hollow) msize(small) mcolor(ltblue))(fpfit `X' lpcGDP2015 [w=P15], lcolor(dknavy) lwidth(medthick)), legend(off) ytitle(`X') ytitle(Area per capita (sq meters per inh.), margin(medsmall)) xtitle(Log(city real GDP) / Log(city population), margin(medsmall)) graphregion(margin(tiny)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ylabel(0(20)100)
}
graph export "FinalFigures\city_pcstock2D_scatter_w.png", replace width(2620) height(1908)

***************************
*** APPENDIX FIGURE C.5 ***
***************************

* Figure a *
* City Per Capita Income and City Population Size, USA, 2015
use city3Dfull, clear
count
* 12,831 
* We create the log per capita GDP variable for this relationship.
gen pcGDP2015_v2 = GDP_2015/(P15/1000) 
gen lpcGDP2015_v2 = log(pcGDP2015)
* We change Tijuana which is actually San Diego in the data. 
replace cityname = "San Diego" if cityname == "Tijuana"
twoway (qfit lpcGDP2015_v2 lP15 [w=P15] if cityname != "Atlantic City", lcolor(gs10) lpattern(longdash))(scatter lpcGDP2015_v2 lP15 if cityname != "Atlantic City", mlabel(UC_NM_MN) mlabsize(tiny) msize(vsmall) msymbol(circle_hollow) mcolor(ltblue) mlabcolor(dknavy)) if CTR_MN_NM == "United States", ytitle(Log(City GDP / Pop) 2015, margin(medsmall)) legend(off) xlabel(11(1)16) xtitle(Log City Pop 2015, margin(medsmall)) graphregion(margin(tiny)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
graph use "wrong_relation_city_us.gph"
graph export "FinalFigures\wrong_rela_pcgdp_us.png", replace width(2620) height(1908)

* Figure b *
* City Per Capita Income and City Population Size, USA, 2015

use city3Dfull, clear
count
* 12,831 
* We create the log per capita GDP variable for this relationship.
gen pcGDP2015_v2 = GDP_2015/(P15/1000) 
gen lpcGDP2015_v2 = log(pcGDP2015)
* We change Tijuana which is actually San Diego in the data. 
replace cityname = "San Diego" if cityname == "Tijuana"
twoway (qfit lpcGDP2015 lP15 [w=P15] if cityname != "Atlantic City", lcolor(gs10) lpattern(longdash))(scatter lpcGDP2015 lP15 if cityname != "Atlantic City", mlabel(UC_NM_MN) mlabsize(tiny) msize(vsmall) msymbol(circle_hollow) mcolor(ltblue) mlabcolor(dknavy)) if CTR_MN_NM == "United States", ytitle(Log(City GDP 2015 in Bn. USD) / Log(City Pop 2015), margin(medsmall)) legend(off) xlabel(11(1)15) ylabel(1(0.1)1.3) xtitle(Log City Pop 2015, margin(medsmall)) graphregion(margin(tiny)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
* We manually modify and export
graph use "rela_pcgdp_us.gph"
graph export "FinalFigures\rela_pcgdp_us.png", replace width(2620) height(1908)

***************************
*** APPENDIX FIGURE C.6 ***
***************************
* City Average Building Height and City Income Per capita, 2015.

use city3Dfull, clear
count
* 12,831 
sum avght_new_div, d
* We exclude the 99p outliers for the scatter
foreach X in avght_new_div {
twoway (scatter `X' lpcGDP2015 if avght_new_div <= 20, msymbol(circle_hollow) msize(small) mcolor(ltblue))(fpfit `X' lpcGDP2015 [w=P15], lcolor(dknavy) lwidth(medthick)), legend(off) ytitle(`X') ytitle(Average building height (m), margin(medsmall)) xtitle(Log(city real GDP) / Log(city population), margin(medsmall)) graphregion(margin(tiny)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ylabel(0(5)20)
}
graph export "FinalFigures\city_avght_new_div_scatter_w.png", replace width(2620) height(1908)

***************************
*** APPENDIX FIGURE C.7 ***
***************************
* Classification of UN subregions, primary cities only, 2019

* We start with the city data set 
* We create the subregional average for the chosen cities 
use tempcities, clear
* totpop = P15
* so weights = city pop 2015
tab top15, m
drop if top15 == .
collapse (mean) proHR proht [w=totpop], by(unsubregion top15)
sum proHR proht, d
* If the values are missing because they have no heights, we assign the lowest values.
replace proHR = -10.91208 if proHR == .
replace proht = -1.252601 if proht == .
* We add the total population of each subregion. 
sort unsubregion
merge unsubregion using totpopsubregion
tab _m
drop _m
* We create the groupings. 
gen asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia" | unsubregion == "Melanesia")
gen neoeuro = (unsubregion == "Australia/New Zealand" | unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe" | unsubregion == "North America")
gen lac = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in asia neoeuro lac africa {
replace unregion = "`X'" if `X' == 1
}
keep if top15 == 1
sum proHR proht
twoway (scatter proHR proht if unregion == "africa", mlabel(unsubregion) mcolor(pink) mlabcolor(pink) msymbol(circle_hollow))(scatter proHR proht if unregion == "asia", mlabel(unsubregion) mcolor(green) mlabcolor(green) msymbol(square_hollow))(scatter proHR proht if unregion == "lac", mlabel(unsubregion) mcolor(red) mlabcolor(red) msymbol(triangle_hollow))(scatter proHR proht if unregion == "neoeuro", mlabel(unsubregion) mcolor(blue) mlabcolor(blue) msymbol(diamond_hollow)), legend(order(1 "Africa" 2 "Asia-Pacific" 3 "LAC" 4 "Other") row(1)) ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs10)) yline(0, lpattern(longdash) lcolor(gs10)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ylabel(-3(1)2) xlabel(-0.5(0.5)0.5)
graph use "cities_prim.gph"
graph export "FinalFigures\reg_class_prim.png", replace width(2620) height(1908)

***************************
*** APPENDIX FIGURE C.8 ***
***************************
* Classification of UN subregions, secondary cities only, 2019.

* We start with the city data set 
* We create the subregional average for the chosen cities 
use tempcities, clear
* totpop = P15
* so weights = city pop 2015
tab top15, m
drop if top15 == .
collapse (mean) proHR proht [w=totpop], by(unsubregion top15)
sum proHR proht, d
* If the values are missing because they have no heights, we assign the lowest values.
replace proHR = -10.91208 if proHR == .
replace proht = -1.252601 if proht == .
* We add the total population of each subregion. 
sort unsubregion
merge unsubregion using totpopsubregion
tab _m
drop _m
* We create the groupings. 
gen asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia" | unsubregion == "Melanesia")
gen neoeuro = (unsubregion == "Australia/New Zealand" | unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe" | unsubregion == "North America")
gen lac = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in asia neoeuro lac africa {
replace unregion = "`X'" if `X' == 1
}
keep if top15 == 0
sum proHR proht
twoway (scatter proHR proht if unregion == "africa", mlabel(unsubregion) mcolor(pink) mlabcolor(pink) msymbol(circle_hollow))(scatter proHR proht if unregion == "asia", mlabel(unsubregion) mcolor(green) mlabcolor(green) msymbol(square_hollow))(scatter proHR proht if unregion == "lac", mlabel(unsubregion) mcolor(red) mlabcolor(red) msymbol(triangle_hollow))(scatter proHR proht if unregion == "neoeuro", mlabel(unsubregion) mcolor(blue) mlabcolor(blue) msymbol(diamond_hollow)), legend(order(1 "Africa" 2 "Asia-Pacific" 3 "LAC" 4 "Other") row(1)) ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs10)) yline(0, lpattern(longdash) lcolor(gs10)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ylabel(-11(1)4) xlabel(-1(0.5)1)
graph use "cities_sec.gph"
graph export "FinalFigures\reg_class_sec.png", replace width(2620) height(1908)

***************************
*** APPENDIX FIGURE C.9 ***
***************************
* Classification of UN Subregions, Rural Areas + Small Towns Below 50K Only, 2019

* Main income variables at the country level
use country3Dfull, clear
keep country_wb lpcgdp2010s* sample
sort country_wb
save lpcgdp2010s_vars, replace

* We create the rural measures. 
* We start with urban areas. 
use city3Dfull, clear
collapse (sum) stock3D_new stock3D_old stock2D totpop AREA stock3D_HR, by(unsubregion incgroup2019 country_wb)
ren totpop totpop_city
ren AREA totarea_city
ren stock3D_new stock3D_new_city
ren stock3D_old stock3D_old_city
ren stock2D stock2D_city
ren stock3D_HR stock3D_HR_city
sort country_wb
save temp2, replace 
count
* We then use the data at the country level and append the data at the city level.
use country3Dfull, clear
collapse (sum) stock3D_new stock3D_old stock2D totpop2015 landareasqkm stock3D_HR, by(unsubregion incgroup2019 country_wb)
ren totpop totpop_cntry
ren landareasqkm totarea_cntry
ren stock3D_new stock3D_new_cntry
ren stock3D_old stock3D_old_cntry
ren stock2D stock2D_cntry
ren stock3D_HR stock3D_HR_cntry
count
sort country_wb
merge country_wb using temp2
tab _m
drop _m
* Many do not have cities, so they are fully rural. 
foreach X of varlist *city {
replace `X' = 0 if `X' == .
}
* These are the variables for rural. 
foreach X in totpop totarea stock3D_new stock3D_old stock2D stock3D_HR {
gen `X'_rural = `X'_cntry-`X'_city
}
sum tot*_rural, d
gsort- totarea_rural
tab country_wb if totpop_rural < 0 
tab country_wb if totarea_rural < 0 
* We now create the crowding measures. 
foreach X in cntry city rural {
gen pcstock3D_new_`X' = stock3D_new_`X'/totpop_`X'
}
foreach X in cntry city rural {
gen pcstock2D_new_`X' = stock2D_`X'/totpop_`X'
gen avght_new_div_`X' = stock3D_new_`X'/stock2D_`X'
gen stock3D_2D_`X' = stock2D_`X'*2.5
gen pcstock3D_2D_`X' = stock3D_2D_`X'/totpop_`X'
gen pcstock3D_no2D_`X' = pcstock3D_new_`X'-pcstock3D_2D_`X'
gen share2Din3D_`X' = stock3D_2D_`X'/stock3D_new_`X'*100
gen shareHRin3D_`X' = stock3D_HR_`X'/stock3D_new_`X' *100
gen shareLRin3D_`X' = 100-share2Din3D_`X'-shareHRin3D_`X'
gen pcstock3D_HR_`X' = stock3D_HR_`X'/totpop_`X'
}
* These are the variables we need. 
keep country_wb pcstock3D_new* pcstock2D_new* avght_new_div* pcstock3D_2D* pcstock3D_no2D* share2D* shareLR* shareHR* pcstock3D_HR* totpop* incgroup2019 unsubregion
reshape long pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D share2Din3D shareLRin3D shareHRin3D pcstock3D_HR totpop totarea, i(incgroup2019 country_wb) j(citysize) string
keep country_w pcstock3D_new pcstock2D_new avght_new_div pcstock3D_2D pcstock3D_no2D pcstock3D_2D share2D* shareLR* shareHR* pcstock3D_HR totpop incgroup2019 citysize unsubregion
keep if citysize == "_rural"
* We add some variables on per capita GDP in the 2010s. 
sort country_wb
merge country_wb using lpcgdp2010s_vars
tab _m
drop _m
keep if sample == 1 & lpcgdp2010s != .
count
* 204
corr lpcgdp*
gen lpcgdp2010s_sq = lpcgdp2010s*lpcgdp2010s
* We log some of the variables. 
gen lpcstock2D = log(pcstock2D)
gen lpcstock3D_no2D = log(pcstock3D_no2D)
gen lpcstock3D_LRno2D = log(pcstock3D_no2D)
gen lpcstock3D_HR = log(pcstock3D_HR)
* We first obtain the residuals relative to the global trend line
foreach X in lpcstock2D lpcstock3D_no2D lpcstock3D_LRno2D lpcstock3D_HR {
reg `X' lpcgdp2010s lpcgdp2010s_sq 
predict `X'_resid_unw, resid
reg `X' lpcgdp2010s lpcgdp2010s_sq [w=totpop]
predict `X'_resid_wt, resid
}
* Correlation between the area and height residuals wrt to per capita income
* We use logs for this analysis
corr lpcstock2D_resid_wt lpcstock3D_no2D_resid_wt 
corr lpcstock2D_resid_wt lpcstock3D_no2D_resid_wt [w=totpop]
* Residuals showing specialization for heights vs area
reg lpcstock3D_no2D_resid_wt lpcstock2D_resid_wt [w=totpop]
predict proht, resid
gsort- proht
* Residuals showing specialization for high-rises vs low-rises
reg lpcstock3D_HR lpcstock3D_LRno2D_resid_wt [w=totpop]
predict proHR, resid
gsort- proHR
* Country specialization residuals that we use for the analysis below
keep country_wb proHR proht totpop unsubregion
ren totpop totpop_rural
ren proHR proHR_rural
ren proht proht_rural
gen rural = 1 
sort country_wb
save country_residuals_rural, replace
count
* 204
sum proHR_rural
sum proht_rural

* We create the subregional averages. . 
use country_residuals_rural, clear
keep if rural == 1
count
* 204
* so weights = rural pop 2015
sum totpop_rural, d
replace totpop_rural = 0 if totpop_rural < 0
keep proHR_rural proht_rural totpop_rural country_wb unsubregion
ren proHR_rural proHR
ren proht_rural proht
collapse (mean) proHR proht [w=totpop_rural], by(unsubregion)
sum proHR proht, d
* We add the population. 
sort unsubregion
merge unsubregion using totpopsubregion
tab _m
drop _m
* We create the groupings. 
gen asia = (unsubregion == "Eastern Asia" | unsubregion == "Central Asia" | unsubregion == "South-Eastern Asia" | unsubregion == "Southern Asia" | unsubregion == "Western Asia" | unsubregion == "Melanesia")
gen neoeuro = (unsubregion == "Australia/New Zealand" | unsubregion == "Eastern Europe" | unsubregion == "Northern Europe" | unsubregion == "Southern Europe" | unsubregion == "Western Europe" | unsubregion == "Western Europe" | unsubregion == "North America")
gen lac = (unsubregion == "Caribbean" | unsubregion == "Central America" | unsubregion == "South America")
gen africa = (unsubregion == "Eastern Africa" | unsubregion == "Middle Africa" | unsubregion == "Northern Africa" | unsubregion == "Southern Africa" | unsubregion == "Western Africa")
gen unregion = ""
foreach X in asia neoeuro lac africa {
replace unregion = "`X'" if `X' == 1
}
tab unregion, m
bysort unregion: tab unsubregion

* We create the figure. 
sum proHR proht
twoway (scatter proHR proht if unregion == "africa", mlabel(unsubregion) mcolor(pink) mlabcolor(pink) msymbol(circle_hollow))(scatter proHR proht if unregion == "asia", mlabel(unsubregion) mcolor(green) mlabcolor(green) msymbol(square_hollow))(scatter proHR proht if unregion == "lac", mlabel(unsubregion) mcolor(red) mlabcolor(red) msymbol(triangle_hollow))(scatter proHR proht if unregion == "neoeuro", mlabel(unsubregion) mcolor(blue) mlabcolor(blue) msymbol(diamond_hollow)), legend(order(1 "Africa" 2 "Asia-Pacific" 3 "LAC" 4 "Other") row(1)) ytitle(Pro-High-Rises, margin(small)) graphregion(margin(tiny)) xtitle(Pro-Heights, margin(small)) xline(0, lpattern(longdash) lcolor(gs10)) yline(0, lpattern(longdash) lcolor(gs10)) graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) ylabel(-6(2)4) xlabel(-0.5(0.5)0.5)
graph use "rural.gph"
graph export "FinalFigures\reg_class_rural.png", replace width(2620) height(1908)